{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Creating Datasets from the PDB\n",
    "\n",
    "Graphein provides a utility for curating and splitting datasets from the [RCSB PDB](https://www.rcsb.org/).\n",
    "\n",
    "\n",
    "Initialising a PDBManager will download PDB Metadata which we can use to make complex selections of protein structures.\n",
    "\n",
    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/a-r-j/graphein/blob/master/notebooks/creating_datasets_from_the_pdb.ipynb) [![GitHub](https://img.shields.io/badge/-View%20on%20GitHub-181717?logo=github&logoColor=ffffff)](https://github.com/a-r-j/graphein/blob/master/notebooks/creating_datasets_from_the_pdb.ipynb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[03/30/23 20:26:33] </span><span style=\"color: #800000; text-decoration-color: #800000\">WARNING </span> To use the Graphein submodule                                         <a href=\"file:///home/atj39/github/graphein/graphein/protein/features/sequence/embeddings.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">embeddings.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/protein/features/sequence/embeddings.py#44\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">44</span></a>\n",
       "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span>         graphein.protein.features.sequence.embeddings, you need to install:   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">                </span>\n",
       "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span>         biovec                                                                <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">                </span>\n",
       "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span>         biovec cannot be installed via conda                                  <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">                </span>\n",
       "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span>         Alternatively, you can install graphein with the extras:              <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">                </span>\n",
       "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span>                                                                               <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">                </span>\n",
       "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span>         pip install graphein<span style=\"font-weight: bold\">[</span>extras<span style=\"font-weight: bold\">]</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">                </span>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m[03/30/23 20:26:33]\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m To use the Graphein submodule                                         \u001b]8;id=230108;file:///home/atj39/github/graphein/graphein/protein/features/sequence/embeddings.py\u001b\\\u001b[2membeddings.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=55314;file:///home/atj39/github/graphein/graphein/protein/features/sequence/embeddings.py#44\u001b\\\u001b[2m44\u001b[0m\u001b]8;;\u001b\\\n",
       "\u001b[2;36m                    \u001b[0m         graphein.protein.features.sequence.embeddings, you need to install:   \u001b[2m                \u001b[0m\n",
       "\u001b[2;36m                    \u001b[0m         biovec                                                                \u001b[2m                \u001b[0m\n",
       "\u001b[2;36m                    \u001b[0m         biovec cannot be installed via conda                                  \u001b[2m                \u001b[0m\n",
       "\u001b[2;36m                    \u001b[0m         Alternatively, you can install graphein with the extras:              \u001b[2m                \u001b[0m\n",
       "\u001b[2;36m                    \u001b[0m                                                                               \u001b[2m                \u001b[0m\n",
       "\u001b[2;36m                    \u001b[0m         pip install graphein\u001b[1m[\u001b[0mextras\u001b[1m]\u001b[0m                                          \u001b[2m                \u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[03/30/23 20:26:40] </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Found <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3749</span> PDB files unavailable for download.                         <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#527\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">527</span></a>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m[03/30/23 20:26:40]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Found \u001b[1;36m3749\u001b[0m PDB files unavailable for download.                         \u001b]8;id=215005;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=591884;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#527\u001b\\\u001b[2m527\u001b[0m\u001b]8;;\u001b\\\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'1bos': False, '1vvj': False, '1vy4': False, '1vy5': False, '1vy6': False, '1vy7': False, '2btj': False, '2vvj': False, '3j3q': False, '3j3y': False, '3j6b': False, '3j6x': False, '3j6y': False, '3j77': False, '3j78': False, '3j79': False, '3j7o': False, '3j7p': False, '3j7q': False, '3j7r': False, '3j8h': False, '3j92': False, '3j9g': False, '3j9k': False, '3j9l': False, '3j9m': False, '3j9r': False, '3j9w': False, '3j9y': False, '3j9z': False, '3ja1': False, '3jag': False, '3jah': False, '3jai': False, '3jaj': False, '3jan': False, '3jbn': False, '3jbo': False, '3jbp': False, '3jbu': False, '3jbv': False, '3jc1': False, '3jc8': False, '3jc9': False, '3jcd': False, '3jce': False, '3jcj': False, '3jcn': False, '3jco': False, '3jcp': False, '3jcs': False, '3jct': False, '3k1q': False, '3whe': False, '4abz': False, '4bp7': False, '4bts': False, '4ctf': False, '4ctg': False, '4d5y': False, '4d67': False, '4dcb': False, '4fqr': False, '4frt': False, '4l47': False, '4l71': False, '4lel': False, '4lfz': False, '4lnt': False, '4lsk': False, '4lt8': False, '4nwr': False, '4o9y': False, '4p6f': False, '4p70': False, '4qyk': False, '4tua': False, '4tub': False, '4tuc': False, '4tud': False, '4tue': False, '4tvx': False, '4u1u': False, '4u1v': False, '4u20': False, '4u24': False, '4u25': False, '4u26': False, '4u27': False, '4u3m': False, '4u3n': False, '4u3u': False, '4u4n': False, '4u4o': False, '4u4q': False, '4u4r': False, '4u4u': False, '4u4y': False, '4u4z': False, '4u50': False, '4u51': False, '4u52': False, '4u53': False, '4u55': False, '4u56': False, '4u6f': False, '4udf': False, '4ue3': False, '4ug0': False, '4ujc': False, '4ujd': False, '4uje': False, '4v3p': False, '4v40': False, '4v41': False, '4v42': False, '4v43': False, '4v44': False, '4v45': False, '4v46': False, '4v47': False, '4v48': False, '4v49': False, '4v4a': False, '4v4b': False, '4v4c': False, '4v4d': False, '4v4e': False, '4v4f': False, '4v4g': False, '4v4h': False, '4v4i': False, '4v4j': False, '4v4k': False, '4v4l': False, '4v4m': False, '4v4n': False, '4v4o': False, '4v4p': False, '4v4q': False, '4v4r': False, '4v4s': False, '4v4t': False, '4v4u': False, '4v4v': False, '4v4w': False, '4v4x': False, '4v4y': False, '4v4z': False, '4v50': False, '4v51': False, '4v52': False, '4v53': False, '4v54': False, '4v55': False, '4v56': False, '4v57': False, '4v58': False, '4v59': False, '4v5a': False, '4v5b': False, '4v5c': False, '4v5d': False, '4v5e': False, '4v5f': False, '4v5g': False, '4v5h': False, '4v5i': False, '4v5j': False, '4v5k': False, '4v5l': False, '4v5m': False, '4v5n': False, '4v5o': False, '4v5p': False, '4v5q': False, '4v5r': False, '4v5s': False, '4v5t': False, '4v5v': False, '4v5w': False, '4v5x': False, '4v5y': False, '4v5z': False, '4v60': False, '4v61': False, '4v62': False, '4v63': False, '4v64': False, '4v65': False, '4v66': False, '4v67': False, '4v68': False, '4v69': False, '4v6a': False, '4v6b': False, '4v6c': False, '4v6d': False, '4v6e': False, '4v6f': False, '4v6g': False, '4v6h': False, '4v6i': False, '4v6k': False, '4v6l': False, '4v6m': False, '4v6n': False, '4v6o': False, '4v6p': False, '4v6q': False, '4v6r': False, '4v6s': False, '4v6t': False, '4v6u': False, '4v6v': False, '4v6w': False, '4v6x': False, '4v6y': False, '4v6z': False, '4v70': False, '4v71': False, '4v72': False, '4v73': False, '4v74': False, '4v75': False, '4v76': False, '4v77': False, '4v78': False, '4v79': False, '4v7a': False, '4v7b': False, '4v7c': False, '4v7d': False, '4v7e': False, '4v7g': False, '4v7h': False, '4v7i': False, '4v7j': False, '4v7k': False, '4v7l': False, '4v7m': False, '4v7n': False, '4v7o': False, '4v7p': False, '4v7q': False, '4v7r': False, '4v7s': False, '4v7t': False, '4v7u': False, '4v7v': False, '4v7w': False, '4v7x': False, '4v7y': False, '4v7z': False, '4v81': False, '4v82': False, '4v83': False, '4v84': False, '4v85': False, '4v86': False, '4v87': False, '4v88': False, '4v89': False, '4v8a': False, '4v8b': False, '4v8c': False, '4v8d': False, '4v8e': False, '4v8f': False, '4v8g': False, '4v8h': False, '4v8i': False, '4v8j': False, '4v8k': False, '4v8l': False, '4v8m': False, '4v8n': False, '4v8o': False, '4v8p': False, '4v8q': False, '4v8r': False, '4v8s': False, '4v8t': False, '4v8u': False, '4v8v': False, '4v8w': False, '4v8x': False, '4v8y': False, '4v8z': False, '4v90': False, '4v91': False, '4v92': False, '4v93': False, '4v94': False, '4v95': False, '4v96': False, '4v97': False, '4v98': False, '4v99': False, '4v9a': False, '4v9b': False, '4v9c': False, '4v9d': False, '4v9e': False, '4v9f': False, '4v9g': False, '4v9h': False, '4v9i': False, '4v9j': False, '4v9k': False, '4v9l': False, '4v9m': False, '4v9n': False, '4v9o': False, '4v9p': False, '4v9q': False, '4v9r': False, '4v9s': False, '4w29': False, '4w2e': False, '4w2f': False, '4w2g': False, '4w2h': False, '4w2i': False, '4w4g': False, '4wf1': False, '4wiz': False, '4woi': False, '4wpo': False, '4wq1': False, '4wqf': False, '4wqr': False, '4wqu': False, '4wqy': False, '4wr6': False, '4wra': False, '4wro': False, '4wsd': False, '4wsm': False, '4wsn': False, '4wt1': False, '4wt8': False, '4wu1': False, '4www': False, '4wz7': False, '4wzd': False, '4wzj': False, '4wzo': False, '4xej': False, '4y4o': False, '4y4p': False, '4ybb': False, '4yd9': False, '4ym7': False, '4ypb': False, '4yuu': False, '4yzv': False, '4z3s': False, '4z8c': False, '4zer': False, '4zsn': False, '5a9z': False, '5aa0': False, '5aco': False, '5afi': False, '5aj0': False, '5aj4': False, '5apn': False, '5apo': False, '5b5m': False, '5b5n': False, '5bkl': False, '5bkn': False, '5bkq': False, '5bp4': False, '5c1a': False, '5cod': False, '5czp': False, '5d8b': False, '5dat': False, '5dc3': False, '5dfe': False, '5dge': False, '5dgf': False, '5dgv': False, '5dox': False, '5doy': False, '5e7k': False, '5e81': False, '5el4': False, '5el5': False, '5el6': False, '5el7': False, '5epi': False, '5euj': False, '5exc': False, '5f8k': False, '5fci': False, '5fcj': False, '5fdu': False, '5fdv': False, '5fki': False, '5fuu': False, '5gak': False, '5gjr': False, '5gky': False, '5gkz': False, '5gl0': False, '5gl1': False, '5gm6': False, '5go9': False, '5goa': False, '5gpn': False, '5gup': False, '5h4p': False, '5h5u': False, '5hau': False, '5hcp': False, '5hcq': False, '5hcr': False, '5hd1': False, '5i4l': False, '5ib7': False, '5ib8': False, '5ibb': False, '5imq': False, '5imr': False, '5ipi': False, '5ipk': False, '5iqr': False, '5it7': False, '5it8': False, '5iv5': False, '5iv7': False, '5j30': False, '5j3c': False, '5j4b': False, '5j4c': False, '5j4d': False, '5j4z': False, '5j5b': False, '5j7l': False, '5j7v': False, '5j7y': False, '5j88': False, '5j8a': False, '5j8b': False, '5j8k': False, '5j91': False, '5jc9': False, '5jcs': False, '5jte': False, '5ju8': False, '5jul': False, '5juo': False, '5jup': False, '5jus': False, '5jut': False, '5juu': False, '5kcr': False, '5kcs': False, '5kps': False, '5kpv': False, '5kpw': False, '5kpx': False, '5l1d': False, '5l3p': False, '5l4g': False, '5leg': False, '5ler': False, '5lfb': False, '5li0': False, '5lks': False, '5lqp': False, '5lyb': False, '5lza': False, '5lzb': False, '5lzc': False, '5lzd': False, '5lze': False, '5lzf': False, '5lzs': False, '5lzt': False, '5lzu': False, '5lzv': False, '5lzw': False, '5lzx': False, '5lzy': False, '5lzz': False, '5m1j': False, '5mc6': False, '5mdv': False, '5mdw': False, '5mdy': False, '5mdz': False, '5mei': False, '5mgp': False, '5mkl': False, '5mmm': False, '5mpb': False, '5mpc': False, '5mq3': False, '5mq7': False, '5mrc': False, '5mre': False, '5mrf': False, '5mx7': False, '5myj': False, '5nco': False, '5nd8': False, '5nd9': False, '5ndg': False, '5ndj': False, '5ndk': False, '5ndv': False, '5ndw': False, '5ngm': False, '5njt': False, '5nmb': False, '5np6': False, '5nrl': False, '5nwy': False, '5o09': False, '5o2r': False, '5o61': False, '5obm': False, '5ojq': False, '5on6': False, '5oql': False, '5ot7': False, '5szs': False, '5t0c': False, '5t15': False, '5t2a': False, '5t2c': False, '5t5h': False, '5t61': False, '5t62': False, '5t6r': False, '5t7v': False, '5t9m': False, '5t9n': False, '5t9r': False, '5t9s': False, '5t9v': False, '5ta3': False, '5tal': False, '5tam': False, '5tan': False, '5tap': False, '5taq': False, '5tas': False, '5tat': False, '5tau': False, '5tav': False, '5taw': False, '5tax': False, '5tay': False, '5taz': False, '5tb0': False, '5tb1': False, '5tb2': False, '5tb3': False, '5tb4': False, '5tbw': False, '5tcr': False, '5tcu': False, '5tga': False, '5tgm': False, '5tx1': False, '5u4i': False, '5u9f': False, '5u9g': False, '5umd': False, '5uot': False, '5uq7': False, '5uq8': False, '5urf': False, '5urw': False, '5urx': False, '5us7': False, '5us9': False, '5uyk': False, '5uyl': False, '5uym': False, '5uyn': False, '5uyp': False, '5uyq': False, '5v74': False, '5v8i': False, '5v93': False, '5vfp': False, '5vfq': False, '5vfs': False, '5vfu': False, '5vku': False, '5vlz': False, '5vp2': False, '5vpo': False, '5vpp': False, '5vyc': False, '5w4k': False, '5wdt': False, '5we4': False, '5we6': False, '5wf0': False, '5wfk': False, '5wfs': False, '5wis': False, '5wit': False, '5wju': False, '5wjv': False, '5wjw': False, '5wjx': False, '5wjy': False, '5wjz': False, '5wk5': False, '5wk6': False, '5wlc': False, '5wp9': False, '5wvi': False, '5wvk': False, '5wyj': False, '5wyk': False, '5x8p': False, '5xjc': False, '5xth': False, '5xti': False, '5xxb': False, '5xy3': False, '5y6p': False, '5ydt': False, '5yzg': False, '5z56': False, '5z57': False, '5zap': False, '5zeb': False, '5zep': False, '5zf0': False, '5zlu': False, '5zwm': False, '5zz8': False, '6ahd': False, '6awb': False, '6awc': False, '6awd': False, '6az3': False, '6b1t': False, '6b43': False, '6b4v': False, '6b9q': False, '6bcu': False, '6bcx': False, '6boh': False, '6bok': False, '6bu8': False, '6buw': False, '6bwx': False, '6bx0': False, '6bx1': False, '6by1': False, '6by7': False, '6bz6': False, '6bz7': False, '6bz8': False, '6c4i': False, '6c50': False, '6c5l': False, '6cae': False, '6cbe': False, '6cde': False, '6cdi': False, '6cfj': False, '6cfk': False, '6cfl': False, '6cgr': False, '6cue': False, '6cuf': False, '6czr': False, '6d90': False, '6d9j': False, '6dhe': False, '6dhf': False, '6dhg': False, '6dhh': False, '6dho': False, '6dhp': False, '6dnc': False, '6dqj': False, '6dqn': False, '6dqs': False, '6dqv': False, '6dqz': False, '6dr0': False, '6dr2': False, '6dra': False, '6drc': False, '6dwu': False, '6dzi': False, '6dzu': False, '6e2r': False, '6e2x': False, '6e2z': False, '6e30': False, '6e32': False, '6e34': False, '6e39': False, '6e8g': False, '6e9d': False, '6ek5': False, '6ekc': False, '6elz': False, '6em1': False, '6enf': False, '6enj': False, '6enu': False, '6eri': False, '6fec': False, '6fg3': False, '6fkr': False, '6foo': False, '6frk': False, '6fsz': False, '6ft6': False, '6ftg': False, '6fti': False, '6ftj': False, '6fvt': False, '6fvu': False, '6fvv': False, '6fvw': False, '6fvx': False, '6fvy': False, '6fxc': False, '6fyx': False, '6fyy': False, '6g2h': False, '6g2i': False, '6g5k': False, '6g8h': False, '6gaw': False, '6gaz': False, '6gb2': False, '6gjc': False, '6gq1': False, '6gqb': False, '6gqv': False, '6gsj': False, '6gsk': False, '6gsl': False, '6gsm': False, '6gsn': False, '6gsr': False, '6gua': False, '6gwt': False, '6gxm': False, '6gxn': False, '6gxo': False, '6gxp': False, '6gz3': False, '6gz4': False, '6gz5': False, '6gzq': False, '6gzx': False, '6gzz': False, '6h03': False, '6h4n': False, '6h58': False, '6h5i': False, '6h6e': False, '6h6f': False, '6h8k': False, '6ha1': False, '6ha8': False, '6hcf': False, '6hcj': False, '6hcm': False, '6hcq': False, '6hcr': False, '6hd7': False, '6hhq': False, '6hht': False, '6hif': False, '6hiv': False, '6hiw': False, '6hix': False, '6hiy': False, '6hiz': False, '6hrm': False, '6ht7': False, '6htq': False, '6hxx': False, '6i46': False, '6i47': False, '6i48': False, '6i49': False, '6i4a': False, '6i7o': False, '6i7v': False, '6i9r': False, '6ia9': False, '6igc': False, '6ip5': False, '6ip6': False, '6ip8': False, '6j0n': False, '6j2c': False, '6j2n': False, '6j2q': False, '6j2x': False, '6j30': False, '6j3y': False, '6j3z': False, '6j40': False, '6j5k': False, '6jeo': False, '6jg3': False, '6jgz': False, '6jh6': False, '6jhn': False, '6ji0': False, '6ji8': False, '6jii': False, '6jiu': False, '6jiy': False, '6jlu': False, '6jrr': False, '6jrs': False, '6jv2': False, '6k33': False, '6k3i': False, '6kad': False, '6kaf': False, '6kco': False, '6ke6': False, '6kel': False, '6kem': False, '6ken': False, '6keo': False, '6kep': False, '6keq': False, '6ker': False, '6kgx': False, '6kif': False, '6kig': False, '6kmw': False, '6kmx': False, '6l4t': False, '6l4u': False, '6l9t': False, '6lgl': False, '6lgn': False, '6lkq': False, '6lqm': False, '6lqp': False, '6lqq': False, '6lqr': False, '6lqs': False, '6lqt': False, '6lqu': False, '6lqv': False, '6lsr': False, '6lss': False, '6lsy': False, '6lu8': False, '6m2w': False, '6m62': False, '6m8p': False, '6m99': False, '6mpg': False, '6mph': False, '6msb': False, '6msd': False, '6mse': False, '6msg': False, '6msh': False, '6msj': False, '6msk': False, '6mtb': False, '6mtc': False, '6mtd': False, '6mte': False, '6mzu': False, '6mzv': False, '6mzx': False, '6mzy': False, '6n06': False, '6n07': False, '6n09': False, '6n0f': False, '6n0g': False, '6n1d': False, '6n1v': False, '6n1w': False, '6n2d': False, '6n2y': False, '6n2z': False, '6n30': False, '6n4v': False, '6n8j': False, '6n8k': False, '6n8l': False, '6n8m': False, '6n8n': False, '6n8o': False, '6n9e': False, '6n9f': False, '6nc3': False, '6ncl': False, '6nd5': False, '6nd6': False, '6ndk': False, '6nf2': False, '6nhj': False, '6nm5': False, '6nsh': False, '6nta': False, '6nu2': False, '6nu3': False, '6nuo': False, '6nwa': False, '6nwy': False, '6nxe': False, '6o2s': False, '6o2t': False, '6o3m': False, '6o8w': False, '6o8x': False, '6o8y': False, '6o8z': False, '6o90': False, '6o97': False, '6o9j': False, '6o9k': False, '6o9r': False, '6of1': False, '6of6': False, '6ofx': False, '6og7': False, '6ogf': False, '6ogg': False, '6ogi': False, '6oif': False, '6oig': False, '6oj2': False, '6oj3': False, '6oj4': False, '6oj5': False, '6oj6': False, '6ola': False, '6ole': False, '6olf': False, '6olg': False, '6oli': False, '6olz': False, '6om0': False, '6om6': False, '6om7': False, '6ope': False, '6ord': False, '6ore': False, '6orl': False, '6orv': False, '6osi': False, '6osk': False, '6osq': False, '6ost': False, '6osy': False, '6ot1': False, '6ot3': False, '6otr': False, '6ouo': False, '6owf': False, '6owg': False, '6oxa': False, '6oxi': False, '6p5i': False, '6p5j': False, '6p5k': False, '6p5n': False, '6pem': False, '6pep': False, '6pj6': False, '6pto': False, '6pv6': False, '6pwb': False, '6q14': False, '6q15': False, '6q16': False, '6q1f': False, '6q3g': False, '6q7l': False, '6q7m': False, '6q8y': False, '6q95': False, '6q97': False, '6q98': False, '6q9a': False, '6q9b': False, '6q9d': False, '6q9e': False, '6qa9': False, '6qbx': False, '6qc2': False, '6qc3': False, '6qc4': False, '6qc5': False, '6qc6': False, '6qc7': False, '6qc8': False, '6qc9': False, '6qca': False, '6qcf': False, '6qcm': False, '6qdv': False, '6qi5': False, '6qik': False, '6ql5': False, '6ql6': False, '6ql7': False, '6ql9': False, '6qn1': False, '6qnq': False, '6qnr': False, '6qsw': False, '6qsx': False, '6qt0': False, '6qtz': False, '6qvk': False, '6qw6': False, '6qx7': False, '6qx9': False, '6qyd': False, '6qyj': False, '6qym': False, '6qz0': False, '6qz9': False, '6qza': False, '6qzc': False, '6qzd': False, '6qzf': False, '6qzp': False, '6r0e': False, '6r21': False, '6r5q': False, '6r6g': False, '6r6p': False, '6r7q': False, '6r83': False, '6r84': False, '6r86': False, '6r87': False, '6rav': False, '6ri5': False, '6rlw': False, '6rm3': False, '6rqc': False, '6rvv': False, '6rvw': False, '6rw4': False, '6rw5': False, '6rxt': False, '6rxu': False, '6rxv': False, '6rxx': False, '6rxy': False, '6rxz': False, '6rzz': False, '6s05': False, '6s0x': False, '6s0z': False, '6s12': False, '6s13': False, '6s2x': False, '6s47': False, '6s5v': False, '6sg2': False, '6sg9': False, '6sga': False, '6sgb': False, '6sgc': False, '6sgg': False, '6sgl': False, '6sgm': False, '6sgn': False, '6sh1': False, '6sh2': False, '6sjt': False, '6skf': False, '6skg': False, '6skl': False, '6skw': False, '6sl9': False, '6sla': False, '6slb': False, '6slc': False, '6slh': False, '6snt': False, '6spf': False, '6spg': False, '6sqq': False, '6sqt': False, '6sqv': False, '6sr7': False, '6srv': False, '6srx': False, '6ss0': False, '6ss2': False, '6ss4': False, '6ss5': False, '6ss6': False, '6sue': False, '6suf': False, '6suj': False, '6suv': False, '6sv4': False, '6swa': False, '6swn': False, '6swo': False, '6swp': False, '6swq': False, '6swv': False, '6swz': False, '6sx4': False, '6sxo': False, '6sxs': False, '6sxu': False, '6syo': False, '6syp': False, '6syx': False, '6szd': False, '6szk': False, '6szs': False, '6t14': False, '6t17': False, '6t19': False, '6t1a': False, '6t1c': False, '6t28': False, '6t29': False, '6t2n': False, '6t2o': False, '6t2p': False, '6t2q': False, '6t2r': False, '6t2s': False, '6t42': False, '6t44': False, '6t4q': False, '6t4r': False, '6t59': False, '6t5o': False, '6t6g': False, '6t6q': False, '6t75': False, '6t7g': False, '6t7i': False, '6t7n': False, '6t7t': False, '6t83': False, '6t8s': False, '6t8u': False, '6t8v': False, '6t8w': False, '6t8y': False, '6t8z': False, '6t92': False, '6t94': False, '6t9d': False, '6t9e': False, '6t9m': False, '6t9r': False, '6ta1': False, '6tah': False, '6tai': False, '6taj': False, '6tak': False, '6tb3': False, '6tb9': False, '6tba': False, '6tbv': False, '6tby': False, '6tc3': False, '6tc5': False, '6tc7': False, '6tcl': False, '6tcx': False, '6tcy': False, '6tdu': False, '6tee': False, '6tef': False, '6teg': False, '6tf9': False, '6tg4': False, '6tg8': False, '6th6': False, '6thy': False, '6tib': False, '6tic': False, '6tid': False, '6tie': False, '6tig': False, '6tih': False, '6tii': False, '6tij': False, '6til': False, '6tin': False, '6tio': False, '6tix': False, '6tjj': False, '6tjk': False, '6tjq': False, '6tjs': False, '6tju': False, '6tk8': False, '6tka': False, '6tkb': False, '6tkc': False, '6tkd': False, '6tke': False, '6tkf': False, '6tkm': False, '6tkn': False, '6tkp': False, '6tkq': False, '6tkr': False, '6tks': False, '6tkz': False, '6tlu': False, '6tm4': False, '6tmg': False, '6tmj': False, '6tmk': False, '6tml': False, '6tmm': False, '6tmp': False, '6tmq': False, '6tmz': False, '6tn0': False, '6tn1': False, '6tn2': False, '6tn4': False, '6tn5': False, '6tnl': False, '6tnu': False, '6to3': False, '6to5': False, '6to9': False, '6toc': False, '6toq': False, '6tpx': False, '6tpy': False, '6tpz': False, '6tq1': False, '6tq2': False, '6tr9': False, '6trg': False, '6trv': False, '6trz': False, '6ts0': False, '6ts1': False, '6tsa': False, '6tsb': False, '6tsd': False, '6tsf': False, '6tsj': False, '6tsl': False, '6tsm': False, '6tsn': False, '6tso': False, '6tsp': False, '6tsq': False, '6tsr': False, '6tss': False, '6tsu': False, '6tsx': False, '6tt5': False, '6tt9': False, '6tu5': False, '6tu7': False, '6tud': False, '6tui': False, '6tul': False, '6tur': False, '6tvi': False, '6tvk': False, '6tvq': False, '6tvu': False, '6tvw': False, '6tw5': False, '6tw6': False, '6tw7': False, '6tw8': False, '6two': False, '6twp': False, '6twz': False, '6txq': False, '6txs': False, '6tz4': False, '6tz5': False, '6tz9': False, '6u0r': False, '6u0v': False, '6u2l': False, '6u3q': False, '6u42': False, '6u48': False, '6u5b': False, '6u5f': False, '6u5k': False, '6ucq': False, '6udj': False, '6ue0': False, '6uo1': False, '6utv': False, '6utw': False, '6utx': False, '6uty': False, '6utz': False, '6uu0': False, '6uu1': False, '6uu2': False, '6uu3': False, '6uu4': False, '6uu5': False, '6uu6': False, '6uu7': False, '6uu8': False, '6uu9': False, '6uua': False, '6uub': False, '6uuc': False, '6uwi': False, '6uz7': False, '6uzc': False, '6v10': False, '6v12': False, '6v1g': False, '6v1t': False, '6v1z': False, '6v39': False, '6v3a': False, '6v3b': False, '6v3d': False, '6v3e': False, '6v41': False, '6v6s': False, '6v8i': False, '6v8w': False, '6v9t': False, '6ve7': False, '6vgs': False, '6vho': False, '6vlz': False, '6vmi': False, '6vu3': False, '6vvh': False, '6vvi': False, '6vwl': False, '6vwm': False, '6vwn': False, '6vyq': False, '6vyr': False, '6vys': False, '6vyt': False, '6vyu': False, '6vyw': False, '6vyx': False, '6vyy': False, '6vyz': False, '6vz2': False, '6vz3': False, '6vz5': False, '6vz7': False, '6vzj': False, '6w19': False, '6w1n': False, '6w1o': False, '6w1p': False, '6w1q': False, '6w1r': False, '6w1t': False, '6w1u': False, '6w1v': False, '6w2s': False, '6w2t': False, '6w6p': False, '6wat': False, '6wd0': False, '6wd1': False, '6wd2': False, '6wd3': False, '6wd4': False, '6wd5': False, '6wd6': False, '6wd7': False, '6wd8': False, '6wd9': False, '6wda': False, '6wdb': False, '6wdc': False, '6wdd': False, '6wde': False, '6wdf': False, '6wdg': False, '6wdh': False, '6wdi': False, '6wdj': False, '6wdk': False, '6wdl': False, '6wdm': False, '6wet': False, '6weu': False, '6wev': False, '6wew': False, '6wfj': False, '6wft': False, '6wfu': False, '6wh3': False, '6wh7': False, '6wjd': False, '6wks': False, '6wkv': False, '6wkx': False, '6wl7': False, '6wl8': False, '6wl9': False, '6wla': False, '6wnv': False, '6wnw': False, '6woo': False, '6wot': False, '6wou': False, '6wov': False, '6ws0': False, '6ws5': False, '6wxe': False, '6wxf': False, '6wxg': False, '6x2i': False, '6x2k': False, '6x32': False, '6x33': False, '6x35': False, '6x36': False, '6x5i': False, '6x62': False, '6x63': False, '6x64': False, '6x65': False, '6x66': False, '6x6g': False, '6x6h': False, '6x6i': False, '6x6j': False, '6x6k': False, '6x6l': False, '6x6s': False, '6x6t': False, '6x6y': False, '6x7a': False, '6x7f': False, '6x7k': False, '6x89': False, '6x9q': False, '6xa1': False, '6xdq': False, '6xdr': False, '6xgf': False, '6xhv': False, '6xhw': False, '6xhx': False, '6xhy': False, '6xi1': False, '6xi3': False, '6xii': False, '6xij': False, '6xir': False, '6xlb': False, '6xnr': False, '6xqd': False, '6xqe': False, '6xr8': False, '6xtj': False, '6xu6': False, '6xu7': False, '6xu8': False, '6xv5': False, '6xvx': False, '6xw0': False, '6xwk': False, '6xwz': False, '6xxf': False, '6xxg': False, '6xxx': False, '6xy3': False, '6xy7': False, '6xyj': False, '6xyw': False, '6xza': False, '6xzb': False, '6xzd': False, '6xzg': False, '6xzp': False, '6xzr': False, '6y01': False, '6y0c': False, '6y0g': False, '6y0k': False, '6y0r': False, '6y0s': False, '6y25': False, '6y2i': False, '6y2j': False, '6y2l': False, '6y2m': False, '6y2t': False, '6y33': False, '6y34': False, '6y3d': False, '6y3q': False, '6y4a': False, '6y57': False, '6y5z': False, '6y60': False, '6y61': False, '6y63': False, '6y65': False, '6y66': False, '6y67': False, '6y69': False, '6y6x': False, '6y7m': False, '6y7q': False, '6y8g': False, '6y8k': False, '6y8x': False, '6y9i': False, '6y9k': False, '6yab': False, '6yaj': False, '6yak': False, '6yam': False, '6yax': False, '6yb4': False, '6yd2': False, '6yd3': False, '6ydp': False, '6ydv': False, '6ydw': False, '6yef': False, '6yf7': False, '6yf9': False, '6yfa': False, '6yfb': False, '6yfc': False, '6yfd': False, '6yfe': False, '6yff': False, '6yfg': False, '6yfh': False, '6yfj': False, '6yfk': False, '6yfl': False, '6yfm': False, '6yfn': False, '6yfo': False, '6yfp': False, '6yfq': False, '6yfr': False, '6yfs': False, '6yft': False, '6yfu': False, '6ygt': False, '6yh0': False, '6yi0': False, '6yi4': False, '6yjq': False, '6yjr': False, '6yjs': False, '6yjt': False, '6yju': False, '6yjv': False, '6ykg': False, '6yl3': False, '6ylf': False, '6ylg': False, '6ylh': False, '6ylx': False, '6yly': False, '6ylz': False, '6ymh': False, '6ymn': False, '6ymq': False, '6yn6': False, '6ynx': False, '6yny': False, '6ynz': False, '6yo0': False, '6yo5': False, '6yoz': False, '6yp1': False, '6ypr': False, '6ypx': False, '6ypz': False, '6yq0': False, '6yq3': False, '6yq6': False, '6yq9': False, '6yqa': False, '6yqb': False, '6yqc': False, '6yqd': False, '6yqh': False, '6yqm': False, '6yqr': False, '6yqs': False, '6yr1': False, '6yr2': False, '6yru': False, '6yrv': False, '6yrx': False, '6yrz': False, '6ys1': False, '6ys2': False, '6ysr': False, '6yss': False, '6yst': False, '6ysu': False, '6ytj': False, '6ytn': False, '6ytp': False, '6ytr': False, '6yul': False, '6yum': False, '6yut': False, '6yv3': False, '6yve': False, '6yvp': False, '6yvr': False, '6yw5': False, '6ywe': False, '6yws': False, '6ywv': False, '6ywx': False, '6ywy': False, '6yx7': False, '6yx8': False, '6yxl': False, '6yxm': False, '6yxx': False, '6yxy': False, '6yxz': False, '6yy5': False, '6yyn': False, '6yyo': False, '6yyp': False, '6yyq': False, '6yyr': False, '6yz3': False, '6yz7': False, '6yzd': False, '6yzf': False, '6yzy': False, '6z07': False, '6z0u': False, '6z14': False, '6z17': False, '6z1p': False, '6z1q': False, '6z1t': False, '6z2e': False, '6z2n': False, '6z33': False, '6z39': False, '6z3c': False, '6z3i': False, '6z3n': False, '6z3o': False, '6z3v': False, '6z4g': False, '6z4h': False, '6z4i': False, '6z4j': False, '6z4k': False, '6z4l': False, '6z4m': False, '6z4n': False, '6z4o': False, '6z4q': False, '6z4s': False, '6z4v': False, '6z5f': False, '6z5h': False, '6z5k': False, '6z5o': False, '6z5v': False, '6z66': False, '6z6b': False, '6z6c': False, '6z6j': False, '6z6k': False, '6z6l': False, '6z6m': False, '6z6n': False, '6z6s': False, '6z7f': False, '6z7g': False, '6z7l': False, '6z7m': False, '6z83': False, '6z84': False, '6z8n': False, '6z8p': False, '6za8': False, '6zb0': False, '6zb1': False, '6zb2': False, '6zb3': False, '6zba': False, '6zbp': False, '6zdk': False, '6zdl': False, '6zdm': False, '6zdw': False, '6zed': False, '6zel': False, '6zet': False, '6zeu': False, '6zev': False, '6zf9': False, '6zfa': False, '6zfn': False, '6zfq': False, '6zh9': False, '6zhc': False, '6zi0': False, '6zi1': False, '6zib': False, '6zic': False, '6zin': False, '6ziv': False, '6zj1': False, '6zj3': False, '6zj4': False, '6zj5': False, '6zj6': False, '6zj7': False, '6zjg': False, '6zjh': False, '6zji': False, '6zk0': False, '6zk7': False, '6zlm': False, '6zlo': False, '6zlr': False, '6zm0': False, '6zm3': False, '6zm5': False, '6zm6': False, '6zm7': False, '6zme': False, '6zmi': False, '6zmo': False, '6zmw': False, '6zmz': False, '6zn1': False, '6zna': False, '6znb': False, '6zny': False, '6znz': False, '6zo0': False, '6zo1': False, '6zo2': False, '6zo3': False, '6zon': False, '6zp4': False, '6zps': False, '6zpv': False, '6zpw': False, '6zpx': False, '6zpy': False, '6zpz': False, '6zq0': False, '6zq1': False, '6zqa': False, '6zqb': False, '6zqc': False, '6zqd': False, '6zqe': False, '6zqf': False, '6zqg': False, '6zr7': False, '6zrx': False, '6zry': False, '6zrz': False, '6zs0': False, '6zs8': False, '6zs9': False, '6zsa': False, '6zsb': False, '6zsc': False, '6zsd': False, '6zse': False, '6zsg': False, '6zsp': False, '6zsq': False, '6zsr': False, '6zt0': False, '6ztj': False, '6ztl': False, '6ztm': False, '6ztn': False, '6zto': False, '6ztp': False, '6zu1': False, '6zu2': False, '6zu5': False, '6zuj': False, '6zv5': False, '6zvj': False, '6zvk': False, '6zvm': False, '6zvn': False, '6zvr': False, '6zvs': False, '6zvt': False, '6zw4': False, '6zw5': False, '6zw6': False, '6zw7': False, '6zwv': False, '6zyt': False, '6zz5': False, '6zzc': False, '6zzh': False, '6zzr': False, '7a01': False, '7a09': False, '7a0d': False, '7a0e': False, '7a0f': False, '7a0g': False, '7a0j': False, '7a26': False, '7a27': False, '7a4f': False, '7a4g': False, '7a4h': False, '7a4i': False, '7a4j': False, '7a5a': False, '7a5f': False, '7a5g': False, '7a5h': False, '7a5i': False, '7a5j': False, '7a5k': False, '7a6w': False, '7a6x': False, '7a7d': False, '7a8w': False, '7a8y': False, '7a9c': False, '7a9g': False, '7a9h': False, '7a9u': False, '7a9y': False, '7a9z': False, '7aa0': False, '7aa1': False, '7aae': False, '7aai': False, '7abf': False, '7abg': False, '7abz': False, '7ac0': False, '7ac7': False, '7acj': False, '7acr': False, '7adz': False, '7ae0': False, '7ae4': False, '7aeb': False, '7aef': False, '7aek': False, '7ael': False, '7aet': False, '7aeu': False, '7aev': False, '7aew': False, '7aey': False, '7af2': False, '7agx': False, '7ah2': False, '7ah9': False, '7ahi': False, '7ahp': False, '7ahw': False, '7ahy': False, '7ahz': False, '7ai0': False, '7ai1': False, '7aia': False, '7aih': False, '7ajb': False, '7ajc': False, '7ajd': False, '7aje': False, '7ajf': False, '7ajg': False, '7ajh': False, '7aji': False, '7ajj': False, '7ajk': False, '7ajl': False, '7ajr': False, '7ajt': False, '7aju': False, '7ak4': False, '7akr': False, '7aks': False, '7all': False, '7alx': False, '7aly': False, '7am2': False, '7amx': False, '7an1': False, '7ana': False, '7anb': False, '7ane': False, '7anm': False, '7anu': False, '7aoi': False, '7aor': False, '7ap5': False, '7aph': False, '7ar4': False, '7arq': False, '7as4': False, '7asd': False, '7ase': False, '7aso': False, '7asp': False, '7ata': False, '7ath': False, '7atk': False, '7atl': False, '7av4': False, '7av5': False, '7av8': False, '7av9': False, '7avc': False, '7ax3': False, '7ayy': False, '7ayz': False, '7az0': False, '7azo': False, '7azs': False, '7b04': False, '7b0m': False, '7b0n': False, '7b0u': False, '7b1y': False, '7b20': False, '7b21': False, '7b23': False, '7b24': False, '7b25': False, '7b27': False, '7b2t': False, '7b4i': False, '7b4o': False, '7b4p': False, '7b58': False, '7b59': False, '7b5a': False, '7b5h': False, '7b5i': False, '7b5k': False, '7b69': False, '7b6a': False, '7b6c': False, '7b6g': False, '7b6s': False, '7b6t': False, '7b6u': False, '7b6v': False, '7b6w': False, '7b74': False, '7b7d': False, '7b9v': False, '7b9w': False, '7bay': False, '7bbo': False, '7bbp': False, '7bdz': False, '7be0': False, '7be1': False, '7be2': False, '7beb': False, '7bec': False, '7bf1': False, '7bf2': False, '7bf7': False, '7bf8': False, '7bf9': False, '7bfa': False, '7bg5': False, '7bga': False, '7bgl': False, '7bgo': False, '7bgx': False, '7bgz': False, '7bh0': False, '7bhp': False, '7bij': False, '7bk5': False, '7bk6': False, '7bk7': False, '7bkx': False, '7bl1': False, '7bl2': False, '7bl3': False, '7blh': False, '7bn2': False, '7bnm': False, '7bns': False, '7bnu': False, '7bo7': False, '7bsi': False, '7bt6': False, '7btb': False, '7bw6': False, '7bzw': False, '7bzy': False, '7cbm': False, '7cf9': False, '7cgb': False, '7cgo': False, '7ckb': False, '7ckc': False, '7coy': False, '7cpj': False, '7cpu': False, '7cpv': False, '7cr8': False, '7d4i': False, '7d5s': False, '7d5t': False, '7d63': False, '7d6z': False, '7d80': False, '7dco': False, '7dgq': False, '7dgr': False, '7dgs': False, '7dkf': False, '7dr2': False, '7drb': False, '7dwx': False, '7e80': False, '7e81': False, '7e82': False, '7eaj': False, '7egb': False, '7egc': False, '7elh': False, '7ena': False, '7enc': False, '7eq9': False, '7etj': False, '7eto': False, '7evf': False, '7ew5': False, '7ext': False, '7ey0': False, '7ey4': False, '7eyd': False, '7ezx': False, '7f25': False, '7f2n': False, '7f4v': False, '7f5s': False, '7f8i': False, '7f9o': False, '7fb1': False, '7fcf': False, '7ff7': False, '7ffe': False, '7fff': False, '7ffl': False, '7ffq': False, '7fik': False, '7fix': False, '7fj1': False, '7fj3': False, '7jgl': False, '7jgm': False, '7jgn': False, '7jil': False, '7jjj': False, '7jk9': False, '7jmf': False, '7jmg': False, '7jmh': False, '7jmi': False, '7jmj': False, '7joq': False, '7jql': False, '7jqm': False, '7jqu': False, '7jss': False, '7jsw': False, '7jsz': False, '7jt1': False, '7jt2': False, '7jt3': False, '7ju4': False, '7k00': False, '7k0s': False, '7k0t': False, '7k22': False, '7k23': False, '7k24': False, '7k50': False, '7k51': False, '7k52': False, '7k53': False, '7k54': False, '7k55': False, '7k58': False, '7k5b': False, '7k6v': False, '7kek': False, '7kgb': False, '7kh1': False, '7kip': False, '7kjk': False, '7kln': False, '7kp5': False, '7kr6': False, '7kts': False, '7kv2': False, '7kv3': False, '7kv4': False, '7kv5': False, '7kv6': False, '7kv7': False, '7kwc': False, '7kzm': False, '7kzo': False, '7kzp': False, '7kzq': False, '7kzr': False, '7kzs': False, '7kzt': False, '7kzv': False, '7l08': False, '7l0u': False, '7l0v': False, '7l0w': False, '7l0x': False, '7l0y': False, '7l18': False, '7l20': False, '7l2y': False, '7l5q': False, '7l5u': False, '7l6a': False, '7l6b': False, '7l6e': False, '7l6f': False, '7l6h': False, '7l6i': False, '7l7x': False, '7l7y': False, '7l7z': False, '7l81': False, '7l82': False, '7l8f': False, '7lbm': False, '7lh5': False, '7lhd': False, '7lki': False, '7lmg': False, '7lmh': False, '7lmi': False, '7lmj': False, '7lnk': False, '7lqe': False, '7lqf': False, '7lqg': False, '7lqh': False, '7lqi': False, '7ls1': False, '7ls2': False, '7ltm': False, '7lu7': False, '7lv0': False, '7lvk': False, '7m1a': False, '7m1b': False, '7m2t': False, '7m2v': False, '7m3l': False, '7m3r': False, '7m3t': False, '7m4w': False, '7m4x': False, '7m4y': False, '7m4z': False, '7m50': False, '7m54': False, '7m57': False, '7m5d': False, '7m6a': False, '7m6l': False, '7md2': False, '7md3': False, '7md7': False, '7mdz': False, '7mf0': False, '7mfj': False, '7mfm': False, '7mfn': False, '7mfs': False, '7mhe': False, '7miz': False, '7mkr': False, '7mks': False, '7mly': False, '7moq': False, '7mpi': False, '7mpj': False, '7mq8': False, '7mq9': False, '7mqa': False, '7mqt': False, '7msc': False, '7msh': False, '7msm': False, '7msz': False, '7mt0': False, '7mt2': False, '7mt3': False, '7mt7': False, '7mtg': False, '7mtp': False, '7mtw': False, '7mtz': False, '7mua': False, '7muc': False, '7mud': False, '7mue': False, '7muq': False, '7mus': False, '7muv': False, '7muw': False, '7muy': False, '7mx6': False, '7mxz': False, '7my1': False, '7my7': False, '7myd': False, '7n1h': False, '7n1i': False, '7n1p': False, '7n2c': False, '7n2u': False, '7n2v': False, '7n30': False, '7n31': False, '7n3m': False, '7n50': False, '7n51': False, '7n52': False, '7n61': False, '7n65': False, '7n6g': False, '7n7x': False, '7n85': False, '7n8b': False, '7n9f': False, '7n9x': False, '7na6': False, '7nac': False, '7nb1': False, '7nb2': False, '7nb3': False, '7nbc': False, '7nbd': False, '7nbf': False, '7nbg': False, '7nbh': False, '7nbr': False, '7nbs': False, '7nbt': False, '7nbu': False, '7ncx': False, '7ndq': False, '7ndt': False, '7ndu': False, '7ne9': False, '7neo': False, '7nfv': False, '7nfx': False, '7ng8': False, '7nh6': False, '7nh8': False, '7nhk': False, '7nhl': False, '7nhm': False, '7nhn': False, '7nkt': False, '7nlv': False, '7nm7': False, '7nm8': False, '7nmc': False, '7nmo': False, '7nmr': False, '7nmt': False, '7nmu': False, '7nmv': False, '7nmy': False, '7nmz': False, '7nn5': False, '7np7': False, '7npm': False, '7npn': False, '7npr': False, '7nps': False, '7npt': False, '7npu': False, '7npv': False, '7npy': False, '7npz': False, '7nq0': False, '7nq1': False, '7nq2': False, '7nq3': False, '7nq5': False, '7nq7': False, '7nq9': False, '7nqb': False, '7nqg': False, '7nqh': False, '7nql': False, '7nr2': False, '7nra': False, '7nrc': False, '7nrd': False, '7nrj': False, '7nrr': False, '7ns0': False, '7ns3': False, '7nsh': False, '7nsi': False, '7nsj': False, '7nso': False, '7nsp': False, '7nsq': False, '7nsw': False, '7nsx': False, '7nsy': False, '7nsz': False, '7nt0': False, '7ntd': False, '7nvg': False, '7nvj': False, '7nvk': False, '7nvp': False, '7nw1': False, '7nwn': False, '7nwo': False, '7nwp': False, '7nwq': False, '7nwt': False, '7nwv': False, '7nww': False, '7nwy': False, '7nxl': False, '7nxz': False, '7nyd': False, '7nyk': False, '7nyl': False, '7nym': False, '7nyn': False, '7nyo': False, '7nz2': False, '7nz3': False, '7nz4': False, '7nzb': False, '7nzc': False, '7nzd': False, '7nze': False, '7nzf': False, '7nzh': False, '7nzo': False, '7nzp': False, '7nzq': False, '7o00': False, '7o0u': False, '7o0v': False, '7o0w': False, '7o0x': False, '7o18': False, '7o19': False, '7o1a': False, '7o1c': False, '7o2t': False, '7o2u': False, '7o2y': False, '7o42': False, '7o4a': False, '7o5b': False, '7o5n': False, '7o5q': False, '7o5t': False, '7o5v': False, '7o5w': False, '7o6x': False, '7o76': False, '7o7y': False, '7o7z': False, '7o80': False, '7o81': False, '7o87': False, '7o9k': False, '7o9m': False, '7o9n': False, '7o9r': False, '7oa0': False, '7oa1': False, '7oa4': False, '7oao': False, '7oap': False, '7oaq': False, '7oau': False, '7oay': False, '7obo': False, '7obr': False, '7obw': False, '7ocj': False, '7od0': False, '7odj': False, '7odk': False, '7odm': False, '7odr': False, '7ods': False, '7odt': False, '7odv': False, '7oe4': False, '7oe5': False, '7oe6': False, '7oeo': False, '7oep': False, '7oer': False, '7oes': False, '7oet': False, '7of1': False, '7of2': False, '7of3': False, '7of4': False, '7of5': False, '7of6': False, '7of7': False, '7ofu': False, '7og1': False, '7og4': False, '7ogo': False, '7ogq': False, '7ogu': False, '7ogy': False, '7ogz': False, '7oh3': False, '7oho': False, '7ohp': False, '7ohq': False, '7ohr': False, '7ohs': False, '7oht': False, '7ohu': False, '7ohv': False, '7ohw': False, '7ohx': False, '7ohy': False, '7oi4': False, '7oic': False, '7oid': False, '7oif': False, '7oig': False, '7oii': False, '7oij': False, '7oil': False, '7oiq': False, '7ois': False, '7oit': False, '7oiz': False, '7oj0': False, '7oj7': False, '7ojf': False, '7ojg': False, '7ojm': False, '7ojo': False, '7ok6': False, '7ok8': False, '7oko': False, '7okr': False, '7okz': False, '7olc': False, '7old': False, '7olj': False, '7oln': False, '7olu': False, '7olw': False, '7om1': False, '7omc': False, '7omi': False, '7oms': False, '7omu': False, '7omv': False, '7omw': False, '7omx': False, '7omy': False, '7omz': False, '7on0': False, '7onp': False, '7oqd': False, '7oqv': False, '7or6': False, '7ord': False, '7orp': False, '7orq': False, '7orx': False, '7os3': False, '7os5': False, '7os6': False, '7os9': False, '7osa': False, '7osm': False, '7ost': False, '7ot5': False, '7otc': False, '7ou1': False, '7ou6': False, '7ou8': False, '7ouc': False, '7oud': False, '7ouj': False, '7ouo': False, '7ova': False, '7ovw': False, '7ow7': False, '7oxp': False, '7oxr': False, '7oya': False, '7oyb': False, '7oyc': False, '7oyd': False, '7oyk': False, '7oz6': False, '7oz8': False, '7oz9': False, '7oza': False, '7ozb': False, '7ozc': False, '7ozd': False, '7oze': False, '7ozf': False, '7ozt': False, '7ozy': False, '7p0k': False, '7p1n': False, '7p1p': False, '7p1t': False, '7p1y': False, '7p24': False, '7p26': False, '7p2e': False, '7p2z': False, '7p30': False, '7p32': False, '7p35': False, '7p3k': False, '7p48': False, '7p4c': False, '7p4d': False, '7p4r': False, '7p4y': False, '7p50': False, '7p5r': False, '7p5u': False, '7p5x': False, '7p5z': False, '7p6f': False, '7p6v': False, '7p6w': False, '7p6y': False, '7p6z': False, '7p75': False, '7p7i': False, '7p7n': False, '7p7o': False, '7p7q': False, '7p7r': False, '7p7s': False, '7p7t': False, '7p7u': False, '7p7w': False, '7p97': False, '7p9l': False, '7p9p': False, '7p9y': False, '7pa1': False, '7pa2': False, '7pa3': False, '7pa6': False, '7pa7': False, '7pa8': False, '7pa9': False, '7paa': False, '7pah': False, '7pai': False, '7paj': False, '7pak': False, '7pal': False, '7pam': False, '7pan': False, '7pao': False, '7paq': False, '7par': False, '7pas': False, '7pb3': False, '7pbc': False, '7pbd': False, '7pbj': False, '7pbu': False, '7pbw': False, '7pbx': False, '7pbz': False, '7pc0': False, '7pd3': False, '7pd6': False, '7pdb': False, '7pdr': False, '7pdw': False, '7pdx': False, '7pe1': False, '7pe2': False, '7pe5': False, '7pe6': False, '7peh': False, '7pei': False, '7pep': False, '7peq': False, '7pfn': False, '7pfo': False, '7pgo': False, '7pgx': False, '7pgy': False, '7pgz': False, '7ph0': False, '7ph9': False, '7pha': False, '7phb': False, '7phc': False, '7phm': False, '7pi1': False, '7pi4': False, '7pi5': False, '7pi8': False, '7pi9': False, '7pia': False, '7pib': False, '7pic': False, '7pil': False, '7pin': False, '7pio': False, '7pip': False, '7piq': False, '7pir': False, '7pis': False, '7pit': False, '7piw': False, '7pj3': False, '7pj4': False, '7pj5': False, '7pj6': False, '7pjo': False, '7pjs': False, '7pjt': False, '7pju': False, '7pjv': False, '7pjw': False, '7pjx': False, '7pjy': False, '7pjz': False, '7pk6': False, '7pkc': False, '7pkr': False, '7pks': False, '7pkt': False, '7pky': False, '7pkz': False, '7pl5': False, '7plf': False, '7plo': False, '7plr': False, '7pmk': False, '7pmn': False, '7pmo': False, '7pmz': False, '7pnh': False, '7pni': False, '7pnk': False, '7pnt': False, '7pnu': False, '7pnv': False, '7pnw': False, '7pnx': False, '7pny': False, '7pnz': False, '7po0': False, '7po1': False, '7po2': False, '7po3': False, '7po4': False, '7pox': False, '7pp1': False, '7ppx': False, '7pq2': False, '7pq3': False, '7pq6': False, '7pq9': False, '7pqa': False, '7pqd': False, '7pqh': False, '7pqn': False, '7pqo': False, '7pr6': False, '7prg': False, '7pri': False, '7prp': False, '7prs': False, '7psa': False, '7pse': False, '7psf': False, '7pt6': False, '7pt7': False, '7ptz': False, '7pu1': False, '7pua': False, '7pub': False, '7pus': False, '7pvi': False, '7pwb': False, '7pwg': False, '7pwh': False, '7pwi': False, '7pwj': False, '7pwo': False, '7pwx': False, '7px5': False, '7px9': False, '7pxb': False, '7pxc': False, '7pxd': False, '7pxo': False, '7pz1': False, '7pzg': False, '7pzh': False, '7pzy': False, '7q08': False, '7q0f': False, '7q0p': False, '7q0r': False, '7q14': False, '7q17': False, '7q18': False, '7q19': False, '7q1i': False, '7q1j': False, '7q1w': False, '7q20': False, '7q2n': False, '7q2o': False, '7q2p': False, '7q2t': False, '7q2u': False, '7q2w': False, '7q30': False, '7q31': False, '7q32': False, '7q39': False, '7q3c': False, '7q3i': False, '7q4k': False, '7q4s': False, '7q4t': False, '7q4u': False, '7q52': False, '7q5c': False, '7q5i': False, '7q5p': False, '7q5r': False, '7q5s': False, '7q5t': False, '7q5u': False, '7q5w': False, '7q63': False, '7q6h': False, '7q7p': False, '7q7q': False, '7q8d': False, '7q8f': False, '7q8g': False, '7q8h': False, '7q8i': False, '7q8j': False, '7q8k': False, '7q8l': False, '7q8m': False, '7q8n': False, '7q8o': False, '7q8p': False, '7q8q': False, '7q91': False, '7q92': False, '7q93': False, '7q9b': False, '7q9c': False, '7q9h': False, '7q9q': False, '7q9r': False, '7q9s': False, '7q9u': False, '7q9x': False, '7q9z': False, '7qan': False, '7qb1': False, '7qb5': False, '7qbh': False, '7qc6': False, '7qc7': False, '7qc8': False, '7qca': False, '7qcq': False, '7qdf': False, '7qdl': False, '7qe3': False, '7qe4': False, '7qel': False, '7qep': False, '7qf2': False, '7qf3': False, '7qf4': False, '7qf5': False, '7qf9': False, '7qfc': False, '7qff': False, '7qfh': False, '7qg8': False, '7qg9': False, '7qgf': False, '7qgg': False, '7qgh': False, '7qgn': False, '7qgq': False, '7qgr': False, '7qgu': False, '7qh4': False, '7qhi': False, '7qhj': False, '7qhk': False, '7qhs': False, '7qhv': False, '7qhw': False, '7qi4': False, '7qij': False, '7qj5': False, '7qj7': False, '7qj8': False, '7qja': False, '7qjb': False, '7qjc': False, '7qjd': False, '7qjh': False, '7qjk': False, '7ql8': False, '7qlg': False, '7qlq': False, '7qls': False, '7qnf': False, '7qnh': False, '7qni': False, '7qnj': False, '7qnp': False, '7qns': False, '7qnv': False, '7qo2': False, '7qo5': False, '7qo6': False, '7qob': False, '7qon': False, '7qor': False, '7qp6': False, '7qp7': False, '7qpk': False, '7qpw': False, '7qq0': False, '7qq8': False, '7qrh': False, '7qrk': False, '7qse': False, '7qsf': False, '7qsi': False, '7qsr': False, '7qtc': False, '7qu7': False, '7qub': False, '7quk': False, '7qum': False, '7qup': False, '7quw': False, '7quz': False, '7qv1': False, '7qv2': False, '7qv3': False, '7qvk': False, '7qvp': False, '7qvr': False, '7qvw': False, '7qw3': False, '7qwh': False, '7qwi': False, '7qwo': False, '7qwq': False, '7qwr': False, '7qws': False, '7qx1': False, '7qxc': False, '7qxd': False, '7qxe': False, '7qxn': False, '7qxp': False, '7qxt': False, '7qxu': False, '7qxw': False, '7qxx': False, '7qy6': False, '7qya': False, '7qyb': False, '7qyh': False, '7qyk': False, '7qyl': False, '7qym': False, '7qyx': False, '7qyy': False, '7qzm': False, '7qzx': False, '7qzy': False, '7qzz': False, '7r00': False, '7r05': False, '7r0p': False, '7r0y': False, '7r1g': False, '7r1n': False, '7r1o': False, '7r1p': False, '7r1q': False, '7r1x': False, '7r2y': False, '7r2z': False, '7r30': False, '7r31': False, '7r32': False, '7r3d': False, '7r3n': False, '7r55': False, '7r5c': False, '7r5j': False, '7r5k': False, '7r5t': False, '7r5x': False, '7r6j': False, '7r7a': False, '7r81': False, '7r8u': False, '7rd1': False, '7rd2': False, '7rev': False, '7rf1': False, '7rf2': False, '7rf3': False, '7rf4': False, '7rf5': False, '7rf6': False, '7rf7': False, '7rf8': False, '7ri8': False, '7rk8': False, '7rk9': False, '7rl0': False, '7rl1': False, '7rl5': False, '7rmc': False, '7rmf': False, '7rmk': False, '7rmo': False, '7rnl': False, '7rnr': False, '7rq8': False, '7rq9': False, '7rqa': False, '7rqb': False, '7rqc': False, '7rqd': False, '7rqe': False, '7rr5': False, '7rro': False, '7rs5': False, '7rs6': False, '7rsx': False, '7rsy': False, '7rsz': False, '7rte': False, '7rti': False, '7rwl': False, '7rwt': False, '7rxy': False, '7ry5': False, '7ryd': False, '7ryf': False, '7ryg': False, '7ryh': False, '7ryj': False, '7ryk': False, '7rz0': False, '7rz2': False, '7s0p': False, '7s0s': False, '7s1g': False, '7s1h': False, '7s1i': False, '7s1j': False, '7s1k': False, '7s1w': False, '7s64': False, '7s78': False, '7s9v': False, '7sa4': False, '7sbq': False, '7sc7': False, '7sc8': False, '7sc9': False, '7sca': False, '7scb': False, '7scc': False, '7sfd': False, '7sfr': False, '7sn4': False, '7sn9': False, '7soe': False, '7sof': False, '7som': False, '7sp4': False, '7spb': False, '7spc': False, '7spi': False, '7spj': False, '7spk': False, '7spu': False, '7sqc': False, '7sqd': False, '7sqf': False, '7sqh': False, '7sqq': False, '7sqt': False, '7ss9': False, '7ssd': False, '7ssh': False, '7ssl': False, '7ssn': False, '7sso': False, '7ssw': False, '7st2': False, '7st3': False, '7st6': False, '7st7': False, '7stg': False, '7su4': False, '7su7': False, '7suk': False, '7sxn': False, '7sxr': False, '7sxx': False, '7sxz': False, '7sy1': False, '7sy7': False, '7t0w': False, '7t0z': False, '7t1w': False, '7t1x': False, '7t3p': False, '7t3q': False, '7t3r': False, '7t3t': False, '7t64': False, '7t65': False, '7t66': False, '7t68': False, '7t6w': False, '7t73': False, '7t74': False, '7t75': False, '7t76': False, '7t77': False, '7t7c': False, '7t81': False, '7t8n': False, '7t8o': False, '7t8v': False, '7t9a': False, '7t9b': False, '7t9e': False, '7tau': False, '7tbi': False, '7tbj': False, '7tbk': False, '7tbl': False, '7tbm': False, '7tcv': False, '7td9': False, '7tdg': False, '7tdh': False, '7tdi': False, '7tdj': False, '7tdk': False, '7tdv': False, '7tdz': False, '7tei': False, '7ten': False, '7tf6': False, '7tf7': False, '7tfa': False, '7tfb': False, '7tfc': False, '7tfd': False, '7tfo': False, '7tgh': False, '7thr': False, '7ti4': False, '7ti5': False, '7tjs': False, '7tjt': False, '7tjv': False, '7tjy': False, '7tk0': False, '7tk3': False, '7tk6': False, '7tkm': False, '7tkq': False, '7tms': False, '7tmt': False, '7tnb': False, '7tnq': False, '7tns': False, '7tnt': False, '7tok': False, '7too': False, '7top': False, '7toq': False, '7tor': False, '7tos': False, '7tr6': False, '7tr8': False, '7tr9': False, '7tra': False, '7tw2': False, '7tzc': False, '7u05': False, '7u06': False, '7u0h': False, '7u0l': False, '7u0p': False, '7u0q': False, '7u1i': False, '7u1j': False, '7u2h': False, '7u2i': False, '7u2j': False, '7u4p': False, '7u4t': False, '7u6f': False, '7u71': False, '7u8c': False, '7u8o': False, '7u8p': False, '7u8q': False, '7u8r': False, '7u94': False, '7u95': False, '7u96': False, '7u97': False, '7u9q': False, '7u9r': False, '7u9t': False, '7u9x': False, '7u9z': False, '7ua1': False, '7ua3': False, '7ua4': False, '7ua5': False, '7ua9': False, '7uc3': False, '7ucj': False, '7uck': False, '7ud4': False, '7ueb': False, '7ug6': False, '7ug7': False, '7uhz': False, '7ui0': False, '7ui9': False, '7uif': False, '7uig': False, '7uio': False, '7uli': False, '7ums': False, '7umt': False, '7un1': False, '7unc': False, '7und': False, '7une': False, '7unf': False, '7ung': False, '7unr': False, '7unu': False, '7unv': False, '7unw': False, '7uom': False, '7uoo': False, '7uph': False, '7uqb': False, '7uqz': False, '7usa': False, '7uti': False, '7utl': False, '7uvl': False, '7uw9': False, '7uwa': False, '7uwb': False, '7uwc': False, '7uwd': False, '7uxh': False, '7uxx': False, '7uxz': False, '7v08': False, '7v0k': False, '7v0t': False, '7v3u': False, '7v3v': False, '7v7h': False, '7v7i': False, '7v7j': False, '7v93': False, '7va9': False, '7vai': False, '7vaj': False, '7vak': False, '7val': False, '7vam': False, '7van': False, '7vao': False, '7vap': False, '7vaq': False, '7var': False, '7vas': False, '7vat': False, '7vau': False, '7vav': False, '7vaw': False, '7vax': False, '7vay': False, '7vb0': False, '7vb9': False, '7vba': False, '7vbb': False, '7vbc': False, '7vci': False, '7vd5': False, '7vd6': False, '7vea': False, '7vml': False, '7vmm': False, '7vmn': False, '7vmo': False, '7vmp': False, '7vmq': False, '7vmr': False, '7vms': False, '7vop': False, '7vor': False, '7vot': False, '7vrt': False, '7vs5': False, '7vu9': False, '7vub': False, '7vul': False, '7vum': False, '7vw7': False, '7vwy': False, '7vy2': False, '7w1c': False, '7w1d': False, '7w1e': False, '7w1y': False, '7w37': False, '7w38': False, '7w39': False, '7w3a': False, '7w3b': False, '7w3c': False, '7w3f': False, '7w3g': False, '7w3h': False, '7w3i': False, '7w3j': False, '7w3k': False, '7w3m': False, '7w5b': False, '7w5z': False, '7w85': False, '7w8g': False, '7wfd': False, '7wfe': False, '7wg5': False, '7wjw': False, '7wns': False, '7wot': False, '7wpr': False, '7wps': False, '7wqo': False, '7wqt': False, '7wtl': False, '7wtm': False, '7wtn': False, '7wto': False, '7wtp': False, '7wtq': False, '7wtr': False, '7wvh': False, '7wz8': False, '7x2g': False, '7x37': False, '7x38': False, '7x3f': False, '7x4k': False, '7x5a': False, '7x5t': False, '7x7q': False, '7xi9': False, '7xib': False, '7xm1': False, '7xn7': False, '7xse': False, '7xsx': False, '7xsz': False, '7xt7': False, '7xtd': False, '7xti': False, '7y4l': False, '7y5e': False, '7y6s': False, '7y7a': False, '7ycx': False, '7yfz': False, '7yla': False, '7yvq': False, '7ywt': False, '7yyp': False, '7yzh': False, '7yzs': False, '7z0n': False, '7z0p': False, '7z13': False, '7z1d': False, '7z1e': False, '7z1l': False, '7z1m': False, '7z1o': False, '7z1r': False, '7z34': False, '7z37': False, '7z3a': False, '7z3k': False, '7z3l': False, '7z3y': False, '7z43': False, '7z49': False, '7z4b': False, '7z4o': False, '7z55': False, '7z56': False, '7z5d': False, '7z5e': False, '7z5f': False, '7z5o': False, '7z5s': False, '7z5t': False, '7z62': False, '7z63': False, '7z66': False, '7z6t': False, '7z89': False, '7z8a': False, '7z8f': False, '7z9a': False, '7z9b': False, '7z9h': False, '7z9i': False, '7z9j': False, '7z9n': False, '7z9o': False, '7z9s': False, '7z9u': False, '7z9w': False, '7z9y': False, '7za6': False, '7za7': False, '7za8': False, '7za9': False, '7zaa': False, '7zab': False, '7zac': False, '7zad': False, '7zae': False, '7zaf': False, '7zaj': False, '7zal': False, '7zam': False, '7zar': False, '7zat': False, '7zaz': False, '7zb2': False, '7zc0': False, '7ze0': False, '7ze7': False, '7ze9': False, '7zen': False, '7zfn': False, '7zfo': False, '7zfs': False, '7zft': False, '7zfu': False, '7zfw': False, '7zfy': False, '7zfz': False, '7zg1': False, '7zg2': False, '7zg8': False, '7zgx': False, '7zgy': False, '7zhj': False, '7zil': False, '7zim': False, '7zin': False, '7zio': False, '7zip': False, '7ziq': False, '7zjw': False, '7zjx': False, '7zn2': False, '7zn4': False, '7zpq': False, '7zq9': False, '7zqb': False, '7zqc': False, '7zqd': False, '7zqn': False, '7zqo': False, '7zra': False, '7zrs': False, '7zs5': False, '7zs6': False, '7zts': False, '7zuf': False, '7zuh': False, '7zui': False, '7zuj': False, '7zuk': False, '7zul': False, '7zus': False, '7zuw': False, '7zux': False, '7zw0': False, '7zwd': False, '7zx0': False, '7zx1': False, '7zx7': False, '7zx8': False, '8a1d': False, '8a1s': False, '8a22': False, '8a2l': False, '8a2m': False, '8a3d': False, '8a3k': False, '8a47': False, '8a5n': False, '8a60': False, '8a9a': False, '8aa5': False, '8aaf': False, '8agb': False, '8agh': False, '8agk': False, '8ago': False, '8agt': False, '8agu': False, '8agv': False, '8agw': False, '8agx': False, '8agz': False, '8ah0': False, '8ah1': False, '8aij': False, '8aiy': False, '8aj4': False, '8aj5': False, '8akn': False, '8alp': False, '8am9': False, '8amt': False, '8anx': False, '8aoz': False, '8ap0': False, '8ap6': False, '8apa': False, '8apb': False, '8apc': False, '8apd': False, '8ape': False, '8apf': False, '8apg': False, '8aph': False, '8apj': False, '8apk': False, '8apn': False, '8ash': False, '8ask': False, '8atj': False, '8atm': False, '8ato': False, '8atu': False, '8atx': False, '8au1': False, '8auk': False, '8auw': False, '8axk': False, '8axl': False, '8axn': False, '8aye': False, '8b0b': False, '8b0c': False, '8b0d': False, '8b0e': False, '8b0f': False, '8b0g': False, '8b0h': False, '8b0x': False, '8b2a': False, '8b2b': False, '8b2c': False, '8b4n': False, '8b5g': False, '8b5h': False, '8b5i': False, '8b5j': False, '8b5l': False, '8b6c': False, '8b7l': False, '8b7o': False, '8b7y': False, '8bac': False, '8bfl': False, '8bfp': False, '8bip': False, '8bjq': False, '8bov': False, '8boy': False, '8bp2': False, '8bpx': False, '8bq5': False, '8bqd': False, '8bqx': False, '8br8': False, '8brm': False, '8bsi': False, '8bsj': False, '8btd': False, '8btr': False, '8bxu': False, '8bxv': False, '8bxw': False, '8bxx': False, '8c5c': False, '8cen': False, '8ceo': False, '8ci0': False, '8crx': False, '8cs9': False, '8csl': False, '8csy': False, '8cue': False, '8cvj': False, '8cvk': False, '8cvl': False, '8cvt': False, '8cw4': False, '8cwm': False, '8cxm': False, '8cxp': False, '8d3c': False, '8d3d': False, '8d6x': False, '8d6y': False, '8d9t': False, '8d9u': False, '8d9v': False, '8dbp': False, '8dbq': False, '8dbr': False, '8dbt': False, '8dbu': False, '8dbv': False, '8dbw': False, '8dd3': False, '8de6': False, '8dli': False, '8dlt': False, '8dlu': False, '8dlx': False, '8dlz': False, '8dp8': False, '8dp9': False, '8dpa': False, '8dv6': False, '8dvd': False, '8dwb': False, '8e2d': False, '8e2e': False, '8e2i': False, '8e2k': False, '8e3j': False, '8e3p': False, '8e4i': False, '8e4k': False, '8e4z': False, '8e52': False, '8e53': False, '8e54': False, '8e5u': False, '8e6g': False, '8e73': False, '8eaf': False, '8eag': False, '8eah': False, '8eai': False, '8eaj': False, '8eak': False, '8eal': False, '8eam': False, '8eaq': False, '8ecw': False, '8ed0': False, '8edk': False, '8edx': False, '8eey': False, '8egv': False, '8ehp': False, '8eid': False, '8eiu': False, '8ej4': False, '8ekb': False, '8ekc': False, '8ekf': False, '8ekn': False, '8ele': False, '8emy': False, '8en1': False, '8en4': False, '8en5': False, '8en6': False, '8ep2': False, '8ep9': False, '8epj': False, '8epo': False, '8epr': False, '8eq4': False, '8eq7': False, '8eq9': False, '8eqd': False, '8eqe': False, '8eqx': False, '8er4': False, '8esq': False, '8esr': False, '8eth': False, '8etl': False, '8eto': False, '8eu8': False, '8eud': False, '8eug': False, '8eui': False, '8eur': False, '8eut': False, '8eux': False, '8fbd': False, '8fbf': False, '8fbl': False, '8fjk': False, '8fjl': False, '8fnw': False, '8fq4': False, '8fv5': False, '8fwi': False, '8g08': False, '8g09': False, '8g0a': False, '8g0d': False, '8g0e': False, '8g5e': False, '8gms': False, '8gpu': False, '8gwa': False, '8gxq': False, '8gxs': False, '8gxu': False, '8gxw': False, '8gxx': False, '8gxy': False, '8gxz': False, '8h2i': False, '8hdr': False, '8hr7': False, '8ika': False}\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #000080; text-decoration-color: #000080\">╭────────────────────────────── </span><span style=\"color: #000080; text-decoration-color: #000080; font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">class</span><span style=\"color: #000000; text-decoration-color: #000000\"> </span><span style=\"color: #008000; text-decoration-color: #008000\">'graphein.ml.datasets.pdb_data.PDBManager'</span><span style=\"color: #000080; text-decoration-color: #000080; font-weight: bold\">&gt;</span><span style=\"color: #000080; text-decoration-color: #000080\"> ───────────────────────────────╮</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span> <span style=\"color: #008080; text-decoration-color: #008080\">A utility for creating selections of experimental PDB structures.</span>                                               <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                                                                                                 <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span> <span style=\"color: #008000; text-decoration-color: #008000\">╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────╮</span> <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span> <span style=\"color: #008000; text-decoration-color: #008000\">│</span> <span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">graphein.ml.datasets.pdb_data.PDBManager</span><span style=\"color: #000000; text-decoration-color: #000000\"> object at </span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0x7f63a02e5190</span><span style=\"font-weight: bold\">&gt;</span>                                         <span style=\"color: #008000; text-decoration-color: #008000\">│</span> <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span> <span style=\"color: #008000; text-decoration-color: #008000\">╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</span> <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                                                                                                 <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                     <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">df</span> =             id   pdb chain  length molecule_type  \\                    <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0</span>       100d_A  100d     A      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10</span>            na                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>       100d_B  100d     B      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10</span>            na                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span>       101d_A  101d     A      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">12</span>            na                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>       101d_B  101d     B      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">12</span>            na                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span>       101m_A  101m     A     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">154</span>       protein                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #808000; text-decoration-color: #808000\">...</span>        <span style=\"color: #808000; text-decoration-color: #808000\">...</span>   <span style=\"color: #808000; text-decoration-color: #808000\">...</span>   <span style=\"color: #808000; text-decoration-color: #808000\">...</span>     <span style=\"color: #808000; text-decoration-color: #808000\">...</span>           <span style=\"color: #808000; text-decoration-color: #808000\">...</span>                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780157</span>  9xia_A  9xia     A     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">388</span>       protein                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780158</span>  9xim_A  9xim     A     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">393</span>       protein                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780159</span>  9xim_B  9xim     B     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">393</span>       protein                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780160</span>  9xim_C  9xim     C     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">393</span>       protein                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780161</span>  9xim_D  9xim     D     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">393</span>       protein                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                                                                                                 <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                                                                               name  \\           <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0</span>       DNA/RNA <span style=\"font-weight: bold\">(</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span>'-<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">R</span><span style=\"font-weight: bold\">(</span>*CP*<span style=\"font-weight: bold\">)</span>-<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">D</span><span style=\"font-weight: bold\">(</span>*CP*GP*GP*CP*GP*CP*CP*GP<span style=\"color: #808000; text-decoration-color: #808000\">...</span>              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>       DNA/RNA <span style=\"font-weight: bold\">(</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span>'-<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">R</span><span style=\"font-weight: bold\">(</span>*CP*<span style=\"font-weight: bold\">)</span>-<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">D</span><span style=\"font-weight: bold\">(</span>*CP*GP*GP*CP*GP*CP*CP*GP<span style=\"color: #808000; text-decoration-color: #808000\">...</span>              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span>       DNA <span style=\"font-weight: bold\">(</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span>'-<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">D</span><span style=\"font-weight: bold\">(</span>*CP*GP*CP*GP*AP*AP*TP*TP*<span style=\"font-weight: bold\">(</span>CBR<span style=\"font-weight: bold\">)</span>P*GP*C<span style=\"color: #808000; text-decoration-color: #808000\">...</span>              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>       DNA <span style=\"font-weight: bold\">(</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span>'-<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">D</span><span style=\"font-weight: bold\">(</span>*CP*GP*CP*GP*AP*AP*TP*TP*<span style=\"font-weight: bold\">(</span>CBR<span style=\"font-weight: bold\">)</span>P*GP*C<span style=\"color: #808000; text-decoration-color: #808000\">...</span>              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span>                                               MYOGLOBIN              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #808000; text-decoration-color: #808000\">...</span>                                                   <span style=\"color: #808000; text-decoration-color: #808000\">...</span>              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780157</span>                                   XYLOSE ISOMERASE              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780158</span>                                 D-XYLOSE ISOMERASE              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780159</span>                                 D-XYLOSE ISOMERASE              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780160</span>                                 D-XYLOSE ISOMERASE              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780161</span>                                 D-XYLOSE ISOMERASE              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                                                                                                 <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                                                                           sequence split        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          n_chains  \\                                                            <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0</span>                                              CCGGCGCCGG   N/A        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span>                                                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>                                              CCGGCGCCGG   N/A        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span>                                                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span>                                            CGCGAATTCGCG   N/A        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span>                                                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>                                            CGCGAATTCGCG   N/A        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span>                                                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span>       MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDR<span style=\"color: #808000; text-decoration-color: #808000\">...</span>   N/A        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>                                                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #808000; text-decoration-color: #808000\">...</span>                                                   <span style=\"color: #808000; text-decoration-color: #808000\">...</span>   <span style=\"color: #808000; text-decoration-color: #808000\">...</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #808000; text-decoration-color: #808000\">...</span>                                                                    <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780157</span>  MNYQPTPEDRFTFGLWTVGWQGRDPFGDATRRALDPVESVQRLAEL<span style=\"color: #808000; text-decoration-color: #808000\">...</span>   N/A        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>                                                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780158</span>  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG<span style=\"color: #808000; text-decoration-color: #808000\">...</span>   N/A        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span>                                                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780159</span>  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG<span style=\"color: #808000; text-decoration-color: #808000\">...</span>   N/A        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span>                                                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780160</span>  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG<span style=\"color: #808000; text-decoration-color: #808000\">...</span>   N/A        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span>                                                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780161</span>  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG<span style=\"color: #808000; text-decoration-color: #808000\">...</span>   N/A        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span>                                                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                                                                                                 <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                                          ligands                      source  resolution  \\     <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0</span>                 <span style=\"font-weight: bold\">[</span>SPM<span style=\"font-weight: bold\">]</span>                                    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1.90</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>                 <span style=\"font-weight: bold\">[</span>SPM<span style=\"font-weight: bold\">]</span>                                    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1.90</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span>         <span style=\"font-weight: bold\">[</span>CBR, MG, NT<span style=\"font-weight: bold\">]</span>                                    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2.25</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>         <span style=\"font-weight: bold\">[</span>CBR, MG, NT<span style=\"font-weight: bold\">]</span>                                    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2.25</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span>       <span style=\"font-weight: bold\">[</span>HEM, NBN, SO4<span style=\"font-weight: bold\">]</span>            Physeter catodon        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2.07</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #808000; text-decoration-color: #808000\">...</span>                 <span style=\"color: #808000; text-decoration-color: #808000\">...</span>                         <span style=\"color: #808000; text-decoration-color: #808000\">...</span>         <span style=\"color: #808000; text-decoration-color: #808000\">...</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780157</span>        <span style=\"font-weight: bold\">[</span>DFR, MN<span style=\"font-weight: bold\">]</span>    Streptomyces rubiginosus        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1.90</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780158</span>        <span style=\"font-weight: bold\">[</span>MN, XLS<span style=\"font-weight: bold\">]</span>  Actinoplanes missouriensis        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2.40</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780159</span>        <span style=\"font-weight: bold\">[</span>MN, XLS<span style=\"font-weight: bold\">]</span>  Actinoplanes missouriensis        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2.40</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780160</span>        <span style=\"font-weight: bold\">[</span>MN, XLS<span style=\"font-weight: bold\">]</span>  Actinoplanes missouriensis        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2.40</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780161</span>        <span style=\"font-weight: bold\">[</span>MN, XLS<span style=\"font-weight: bold\">]</span>  Actinoplanes missouriensis        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2.40</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                                                                                                 <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                                 deposition_date experiment_type  pdb_file_available             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0</span>           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1994</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">12</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">05</span>     diffraction                <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1994</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">12</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">05</span>     diffraction                <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span>           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1994</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">12</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">14</span>     diffraction                <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1994</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">12</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">14</span>     diffraction                <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span>           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1997</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">12</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">13</span>     diffraction                <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #808000; text-decoration-color: #808000\">...</span>                <span style=\"color: #808000; text-decoration-color: #808000\">...</span>             <span style=\"color: #808000; text-decoration-color: #808000\">...</span>                 <span style=\"color: #808000; text-decoration-color: #808000\">...</span>             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780157</span>      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1990</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">11</span>     diffraction                <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780158</span>      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1992</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">04</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">03</span>     diffraction                <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780159</span>      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1992</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">04</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">03</span>     diffraction                <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780160</span>      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1992</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">04</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">03</span>     diffraction                <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780161</span>      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1992</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">04</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">03</span>     diffraction                <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                                                                                                 <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"font-weight: bold\">[</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780162</span> rows x <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> columns<span style=\"font-weight: bold\">]</span>                                             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                    <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">ligand_map_filename</span> = <span style=\"color: #008000; text-decoration-color: #008000\">'cc-to-pdb.tdd'</span>                                                        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                         <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">ligand_map_url</span> = <span style=\"color: #008000; text-decoration-color: #008000\">'http://ligand-expo.rcsb.org/dictionaries/cc-to-pdb.tdd'</span>               <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                           <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">list_columns</span> = <span style=\"font-weight: bold\">[</span><span style=\"color: #008000; text-decoration-color: #008000\">'ligands'</span><span style=\"font-weight: bold\">]</span>                                                            <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>              <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">pdb_availability_filename</span> = <span style=\"color: #008000; text-decoration-color: #008000\">'pdb_bundle_index.txt'</span>                                                 <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                   <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">pdb_availability_url</span> = <span style=\"color: #008000; text-decoration-color: #008000\">'https://files.wwpdb.org/pub/pdb/compatible/pdb_bundle/pdb_bundle_ind…</span> <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>           <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">pdb_deposition_date_filename</span> = <span style=\"color: #008000; text-decoration-color: #008000\">'entries.idx'</span>                                                          <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">pdb_deposition_date_url</span> = <span style=\"color: #008000; text-decoration-color: #008000\">'https://files.wwpdb.org/pub/pdb/derived_data/index/entries.idx'</span>       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">pdb_dir</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">PosixPath</span><span style=\"font-weight: bold\">(</span><span style=\"color: #008000; text-decoration-color: #008000\">'pdb'</span><span style=\"font-weight: bold\">)</span>                                                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">pdb_entry_type_filename</span> = <span style=\"color: #008000; text-decoration-color: #008000\">'pdb_entry_type.txt'</span>                                                   <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                     <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">pdb_entry_type_url</span> = <span style=\"color: #008000; text-decoration-color: #008000\">'https://files.wwpdb.org/pub/pdb/derived_data/pdb_entry_type.txt'</span>      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>            <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">pdb_seqres_archive_filename</span> = <span style=\"color: #008000; text-decoration-color: #008000\">'pdb_seqres.txt.gz'</span>                                                    <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                    <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">pdb_seqres_filename</span> = <span style=\"color: #008000; text-decoration-color: #008000\">'pdb_seqres.txt'</span>                                                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                      <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">pdb_sequences_url</span> = <span style=\"color: #008000; text-decoration-color: #008000\">'https://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt.gz'</span>         <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                    <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">resolution_filename</span> = <span style=\"color: #008000; text-decoration-color: #008000\">'resolu.idx'</span>                                                           <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                         <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">resolution_url</span> = <span style=\"color: #008000; text-decoration-color: #008000\">'https://files.wwpdb.org/pub/pdb/derived_data/index/resolu.idx'</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                               <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">root_dir</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">PosixPath</span><span style=\"font-weight: bold\">(</span><span style=\"color: #008000; text-decoration-color: #008000\">'.'</span><span style=\"font-weight: bold\">)</span>                                                         <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                 <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">source</span> =             id   pdb chain  length molecule_type  \\                    <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0</span>       100d_A  100d     A      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10</span>            na                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>       100d_B  100d     B      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10</span>            na                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span>       101d_A  101d     A      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">12</span>            na                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>       101d_B  101d     B      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">12</span>            na                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span>       101m_A  101m     A     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">154</span>       protein                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #808000; text-decoration-color: #808000\">...</span>        <span style=\"color: #808000; text-decoration-color: #808000\">...</span>   <span style=\"color: #808000; text-decoration-color: #808000\">...</span>   <span style=\"color: #808000; text-decoration-color: #808000\">...</span>     <span style=\"color: #808000; text-decoration-color: #808000\">...</span>           <span style=\"color: #808000; text-decoration-color: #808000\">...</span>                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780157</span>  9xia_A  9xia     A     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">388</span>       protein                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780158</span>  9xim_A  9xim     A     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">393</span>       protein                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780159</span>  9xim_B  9xim     B     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">393</span>       protein                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780160</span>  9xim_C  9xim     C     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">393</span>       protein                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780161</span>  9xim_D  9xim     D     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">393</span>       protein                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                                                                                                 <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                                                                               name  \\           <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0</span>       DNA/RNA <span style=\"font-weight: bold\">(</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span>'-<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">R</span><span style=\"font-weight: bold\">(</span>*CP*<span style=\"font-weight: bold\">)</span>-<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">D</span><span style=\"font-weight: bold\">(</span>*CP*GP*GP*CP*GP*CP*CP*GP<span style=\"color: #808000; text-decoration-color: #808000\">...</span>              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>       DNA/RNA <span style=\"font-weight: bold\">(</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span>'-<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">R</span><span style=\"font-weight: bold\">(</span>*CP*<span style=\"font-weight: bold\">)</span>-<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">D</span><span style=\"font-weight: bold\">(</span>*CP*GP*GP*CP*GP*CP*CP*GP<span style=\"color: #808000; text-decoration-color: #808000\">...</span>              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span>       DNA <span style=\"font-weight: bold\">(</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span>'-<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">D</span><span style=\"font-weight: bold\">(</span>*CP*GP*CP*GP*AP*AP*TP*TP*<span style=\"font-weight: bold\">(</span>CBR<span style=\"font-weight: bold\">)</span>P*GP*C<span style=\"color: #808000; text-decoration-color: #808000\">...</span>              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>       DNA <span style=\"font-weight: bold\">(</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span>'-<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">D</span><span style=\"font-weight: bold\">(</span>*CP*GP*CP*GP*AP*AP*TP*TP*<span style=\"font-weight: bold\">(</span>CBR<span style=\"font-weight: bold\">)</span>P*GP*C<span style=\"color: #808000; text-decoration-color: #808000\">...</span>              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span>                                               MYOGLOBIN              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #808000; text-decoration-color: #808000\">...</span>                                                   <span style=\"color: #808000; text-decoration-color: #808000\">...</span>              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780157</span>                                   XYLOSE ISOMERASE              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780158</span>                                 D-XYLOSE ISOMERASE              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780159</span>                                 D-XYLOSE ISOMERASE              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780160</span>                                 D-XYLOSE ISOMERASE              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780161</span>                                 D-XYLOSE ISOMERASE              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                                                                                                 <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                                                                           sequence split        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          n_chains  \\                                                            <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0</span>                                              CCGGCGCCGG   N/A        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span>                                                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>                                              CCGGCGCCGG   N/A        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span>                                                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span>                                            CGCGAATTCGCG   N/A        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span>                                                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>                                            CGCGAATTCGCG   N/A        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span>                                                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span>       MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDR<span style=\"color: #808000; text-decoration-color: #808000\">...</span>   N/A        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>                                                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #808000; text-decoration-color: #808000\">...</span>                                                   <span style=\"color: #808000; text-decoration-color: #808000\">...</span>   <span style=\"color: #808000; text-decoration-color: #808000\">...</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #808000; text-decoration-color: #808000\">...</span>                                                                    <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780157</span>  MNYQPTPEDRFTFGLWTVGWQGRDPFGDATRRALDPVESVQRLAEL<span style=\"color: #808000; text-decoration-color: #808000\">...</span>   N/A        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>                                                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780158</span>  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG<span style=\"color: #808000; text-decoration-color: #808000\">...</span>   N/A        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span>                                                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780159</span>  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG<span style=\"color: #808000; text-decoration-color: #808000\">...</span>   N/A        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span>                                                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780160</span>  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG<span style=\"color: #808000; text-decoration-color: #808000\">...</span>   N/A        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span>                                                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780161</span>  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG<span style=\"color: #808000; text-decoration-color: #808000\">...</span>   N/A        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span>                                                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                                                                                                 <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                                          ligands                      source  resolution  \\     <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0</span>                 <span style=\"font-weight: bold\">[</span>SPM<span style=\"font-weight: bold\">]</span>                                    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1.90</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>                 <span style=\"font-weight: bold\">[</span>SPM<span style=\"font-weight: bold\">]</span>                                    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1.90</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span>         <span style=\"font-weight: bold\">[</span>CBR, MG, NT<span style=\"font-weight: bold\">]</span>                                    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2.25</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>         <span style=\"font-weight: bold\">[</span>CBR, MG, NT<span style=\"font-weight: bold\">]</span>                                    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2.25</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span>       <span style=\"font-weight: bold\">[</span>HEM, NBN, SO4<span style=\"font-weight: bold\">]</span>            Physeter catodon        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2.07</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #808000; text-decoration-color: #808000\">...</span>                 <span style=\"color: #808000; text-decoration-color: #808000\">...</span>                         <span style=\"color: #808000; text-decoration-color: #808000\">...</span>         <span style=\"color: #808000; text-decoration-color: #808000\">...</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780157</span>        <span style=\"font-weight: bold\">[</span>DFR, MN<span style=\"font-weight: bold\">]</span>    Streptomyces rubiginosus        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1.90</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780158</span>        <span style=\"font-weight: bold\">[</span>MN, XLS<span style=\"font-weight: bold\">]</span>  Actinoplanes missouriensis        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2.40</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780159</span>        <span style=\"font-weight: bold\">[</span>MN, XLS<span style=\"font-weight: bold\">]</span>  Actinoplanes missouriensis        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2.40</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780160</span>        <span style=\"font-weight: bold\">[</span>MN, XLS<span style=\"font-weight: bold\">]</span>  Actinoplanes missouriensis        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2.40</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780161</span>        <span style=\"font-weight: bold\">[</span>MN, XLS<span style=\"font-weight: bold\">]</span>  Actinoplanes missouriensis        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2.40</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                                                                                                 <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                                 deposition_date experiment_type  pdb_file_available             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0</span>           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1994</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">12</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">05</span>     diffraction                <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1994</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">12</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">05</span>     diffraction                <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span>           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1994</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">12</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">14</span>     diffraction                <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1994</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">12</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">14</span>     diffraction                <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span>           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1997</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">12</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">13</span>     diffraction                <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #808000; text-decoration-color: #808000\">...</span>                <span style=\"color: #808000; text-decoration-color: #808000\">...</span>             <span style=\"color: #808000; text-decoration-color: #808000\">...</span>                 <span style=\"color: #808000; text-decoration-color: #808000\">...</span>             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780157</span>      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1990</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">11</span>     diffraction                <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780158</span>      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1992</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">04</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">03</span>     diffraction                <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780159</span>      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1992</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">04</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">03</span>     diffraction                <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780160</span>      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1992</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">04</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">03</span>     diffraction                <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780161</span>      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1992</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">04</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">03</span>     diffraction                <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                                                                                                 <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"font-weight: bold\">[</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">780162</span> rows x <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> columns<span style=\"font-weight: bold\">]</span>                                             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                    <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">source_map_filename</span> = <span style=\"color: #008000; text-decoration-color: #008000\">'source.idx'</span>                                                           <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                         <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">source_map_url</span> = <span style=\"color: #008000; text-decoration-color: #008000\">'https://files.wwpdb.org/pub/pdb/derived_data/index/source.idx'</span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                        <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">splits_provided</span> = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span>                                                                  <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">cluster</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">cluster</span><span style=\"font-weight: bold\">(</span>min_seq_id: float = <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.3</span>, coverage: float = <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.8</span>, update:    <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span>, fasta_fname: Optional<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>, cluster_fname:        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          Optional<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>, overwrite: bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span><span style=\"font-weight: bold\">)</span> -&gt;                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          Union<span style=\"font-weight: bold\">[</span>pandas.core.frame.DataFrame, Dict<span style=\"font-weight: bold\">[</span>str,                           <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          pandas.core.frame.DataFrame<span style=\"font-weight: bold\">]]</span>: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Cluster sequences in selection using </span>   <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">MMseqs2.</span>                                                               <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                         <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">compare_length</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">compare_length</span><span style=\"font-weight: bold\">(</span>length: int, comparison: str = <span style=\"color: #008000; text-decoration-color: #008000\">'equal'</span>,             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          compare_pdb_groups: bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span>, splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>,  <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          update: bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span><span style=\"font-weight: bold\">)</span>: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Select molecules with a given length.</span>           <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                      <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">download_metadata</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">download_metadata</span><span style=\"font-weight: bold\">()</span>: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Download all PDB metadata.</span>                    <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                          <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">download_pdbs</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">download_pdbs</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">out_dir</span>=<span style=\"color: #008000; text-decoration-color: #008000\">'.'</span>, <span style=\"color: #808000; text-decoration-color: #808000\">format</span>=<span style=\"color: #008000; text-decoration-color: #008000\">'pdb'</span>, splits:                   <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>, overwrite: bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span>, max_workers: int  <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          = <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8</span>, chunksize: int = <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">32</span><span style=\"font-weight: bold\">)</span>: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Download PDB files in the current </span>          <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">selection.</span>                                                             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                        <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">experiment_type</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">experiment_type</span><span style=\"font-weight: bold\">(</span>type: str = <span style=\"color: #008000; text-decoration-color: #008000\">'diffraction'</span>, splits:                 <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>, update: bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span><span style=\"font-weight: bold\">)</span> -&gt;                   <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          pandas.core.frame.DataFrame: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Select molecules by experiment type. </span>     <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f; font-weight: bold\">[</span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">`diffraction`, `NMR`, `EM`, `other`</span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f; font-weight: bold\">]</span>                                  <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                            <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">export_pdbs</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">export_pdbs</span><span style=\"font-weight: bold\">(</span>pdb_dir: str, splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>,      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          max_num_chains_per_pdb_code: int = <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, force: bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span><span style=\"font-weight: bold\">)</span>: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Write the </span> <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">selection as a collection of PDB files.</span>                                <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>              <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">filter_by_deposition_date</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">filter_by_deposition_date</span><span style=\"font-weight: bold\">(</span>max_deposition_date: numpy.datetime64,   <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          update: bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span><span style=\"font-weight: bold\">)</span> -&gt; Union<span style=\"font-weight: bold\">[</span>pandas.core.frame.DataFrame, Dict<span style=\"font-weight: bold\">[</span>str,  <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          pandas.core.frame.DataFrame<span style=\"font-weight: bold\">]]</span>: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Select molecules deposited on or before</span> <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">a given date.</span>                                                          <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                             <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">from_fasta</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">from_fasta</span><span style=\"font-weight: bold\">(</span>ids: str, filename: str, splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> =  <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span><span style=\"font-weight: bold\">)</span> -&gt; pandas.core.frame.DataFrame: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Create a selection from a FASTA </span> <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">file.</span>                                                                  <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                    <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">get_best_resolution</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">get_best_resolution</span><span style=\"font-weight: bold\">(</span>splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span><span style=\"font-weight: bold\">)</span> -&gt; float:  <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Return the best resolution in the dataset.</span>                             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                   <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">get_experiment_types</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">get_experiment_types</span><span style=\"font-weight: bold\">(</span>splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span><span style=\"font-weight: bold\">)</span> -&gt;        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]</span>: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Return list of different experiment types in the dataset.</span>   <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                      <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">get_longest_chain</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">get_longest_chain</span><span style=\"font-weight: bold\">(</span>splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span><span style=\"font-weight: bold\">)</span> -&gt; int:      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Return the length of the longest chain in the dataset.</span>                 <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                     <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">get_molecule_names</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">get_molecule_names</span><span style=\"font-weight: bold\">(</span>splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span><span style=\"font-weight: bold\">)</span> -&gt;          <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]</span>: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Return list of molecule names in the dataset.</span>               <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                     <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">get_molecule_types</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">get_molecule_types</span><span style=\"font-weight: bold\">(</span>splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span><span style=\"font-weight: bold\">)</span> -&gt;          <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]</span>: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Return list of different molecule types in the dataset.</span>     <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                         <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">get_num_chains</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">get_num_chains</span><span style=\"font-weight: bold\">(</span>splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span><span style=\"font-weight: bold\">)</span> -&gt; int: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Return </span> <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">the number of chains in the dataset.</span>                                   <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                    <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">get_num_unique_pdbs</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">get_num_unique_pdbs</span><span style=\"font-weight: bold\">(</span>splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span><span style=\"font-weight: bold\">)</span> -&gt; int:    <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Return the number of unique PDB IDs in the dataset.</span>                    <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                     <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">get_shortest_chain</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">get_shortest_chain</span><span style=\"font-weight: bold\">(</span>splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span><span style=\"font-weight: bold\">)</span> -&gt; int:     <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Return the length of the shortest chain in the dataset.</span>                <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                             <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">get_splits</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">get_splits</span><span style=\"font-weight: bold\">(</span>splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>, df_splits:          <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          Optional<span style=\"font-weight: bold\">[</span>Dict<span style=\"font-weight: bold\">[</span>str, pandas.core.frame.DataFrame<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>, source: bool  <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span><span style=\"font-weight: bold\">)</span> -&gt; pandas.core.frame.DataFrame: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Return DataFrame entries </span>     <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">belonging to the splits given.</span>                                         <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>              <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">get_unavailable_pdb_files</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">get_unavailable_pdb_files</span><span style=\"font-weight: bold\">(</span>splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span><span style=\"font-weight: bold\">)</span> -&gt;   <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]</span>: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Returns a list of PDB files unavailable for download.</span>       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                        <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">get_unique_pdbs</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">get_unique_pdbs</span><span style=\"font-weight: bold\">(</span>splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span><span style=\"font-weight: bold\">)</span> -&gt; List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]</span>:  <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Return a list of unique PDB IDs in the dataset.</span>                        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                   <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">get_worst_resolution</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">get_worst_resolution</span><span style=\"font-weight: bold\">(</span>splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span><span style=\"font-weight: bold\">)</span> -&gt; float: <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Return the worst resolution in the dataset.</span>                            <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                             <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">has_ligand</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">has_ligand</span><span style=\"font-weight: bold\">(</span>ligand: str, splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>,        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          update: bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span><span style=\"font-weight: bold\">)</span> -&gt; pandas.core.frame.DataFrame: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Select molecules</span> <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">that contain a given ligand.</span>                                           <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                            <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">has_ligands</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">has_ligands</span><span style=\"font-weight: bold\">(</span>ligands: List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]</span>, splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> =      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>, inverse: bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span>, update: bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span><span style=\"font-weight: bold\">)</span>: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Select molecules </span>  <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">that contain all ligands in the provided list.</span>                         <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                        <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">length_equal_to</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">length_equal_to</span><span style=\"font-weight: bold\">(</span>length: int, splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>,   <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          update: bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span><span style=\"font-weight: bold\">)</span> -&gt; pandas.core.frame.DataFrame: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Select molecules</span> <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">equal to a given length.</span>                                               <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                     <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">length_longer_than</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">length_longer_than</span><span style=\"font-weight: bold\">(</span>length: int, splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> =      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>, update: bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span><span style=\"font-weight: bold\">)</span> -&gt; pandas.core.frame.DataFrame: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Select </span>    <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">molecules longer than a given length.</span>                                  <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                    <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">length_shorter_than</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">length_shorter_than</span><span style=\"font-weight: bold\">(</span>length: int, splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> =     <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>, update: bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span><span style=\"font-weight: bold\">)</span> -&gt; pandas.core.frame.DataFrame: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Select </span>    <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">molecules shorter than a given length.</span>                                 <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                        <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">merge_df_splits</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">merge_df_splits</span><span style=\"font-weight: bold\">(</span>first_df_split: pandas.core.frame.DataFrame,       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          second_df_split: pandas.core.frame.DataFrame, split: str<span style=\"font-weight: bold\">)</span> -&gt;           <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          pandas.core.frame.DataFrame: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Reconcile an existing DataFrame split </span>    <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">with a new split.</span>                                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                 <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">merge_pdb_chain_groups</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">merge_pdb_chain_groups</span><span style=\"font-weight: bold\">(</span>group:                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          pandas.core.groupby.generic.DataFrameGroupBy<span style=\"font-weight: bold\">)</span> -&gt;                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          pandas.core.frame.DataFrame: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Combine groups of chains associated with </span> <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">the same PDB code.</span>                                                     <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                          <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">molecule_type</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">molecule_type</span><span style=\"font-weight: bold\">(</span>type: str = <span style=\"color: #008000; text-decoration-color: #008000\">'protein'</span>, splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>, update: bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span><span style=\"font-weight: bold\">)</span> -&gt; pandas.core.frame.DataFrame: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Select </span>    <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">molecules by molecule type. </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f; font-weight: bold\">[</span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">`protein`, `dna`, `rna`</span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f; font-weight: bold\">]</span>                  <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                             <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">oligomeric</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">oligomeric</span><span style=\"font-weight: bold\">(</span>oligomer: int = <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, comparison: str = <span style=\"color: #008000; text-decoration-color: #008000\">'equal'</span>, splits:   <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>, update: bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span><span style=\"font-weight: bold\">)</span> -&gt;                   <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          pandas.core.frame.DataFrame:                                           <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Select molecules with a given oligmeric length.</span>                        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">I.e. ``df.n_chains ==</span><span style=\"color: #bf7fbf; text-decoration-color: #bf7fbf\">/</span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f; font-weight: bold\">&lt;</span><span style=\"color: #bf7fbf; text-decoration-color: #bf7fbf\">/</span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f; font-weight: bold\">&gt;</span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">  oligomer``</span>                                  <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                  <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">parse</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">parse</span><span style=\"font-weight: bold\">()</span> -&gt; pandas.core.frame.DataFrame: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Parse all PDB sequence </span>    <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">records.</span>                                                               <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span> <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">remove_non_standard_alphabet_sequences</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">remove_non_standard_alphabet_sequences</span><span style=\"font-weight: bold\">(</span>splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>, update: bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span><span style=\"font-weight: bold\">)</span>: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Remove sequences with non-standard </span>     <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">characters.</span>                                                            <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">remove_unavailable_pdbs</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">remove_unavailable_pdbs</span><span style=\"font-weight: bold\">(</span>splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>,        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          update: bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span><span style=\"font-weight: bold\">)</span> -&gt; pandas.core.frame.DataFrame:                  <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Removes PDB files that are not available for download from the</span>         <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">selection.</span>                                                             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                  <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">reset</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">reset</span><span style=\"font-weight: bold\">()</span> -&gt; pandas.core.frame.DataFrame: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Reset the dataset to the </span>  <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">original DataFrame source.</span>                                             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>     <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">resolution_better_than_or_equal_to</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">resolution_better_than_or_equal_to</span><span style=\"font-weight: bold\">(</span>resolution: float, splits:      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>, update: bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span><span style=\"font-weight: bold\">)</span> -&gt;                   <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          pandas.core.frame.DataFrame: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Select molecules with a resolution better</span> <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">than or equal to the given value.</span>                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>      <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">resolution_worse_than_or_equal_to</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">resolution_worse_than_or_equal_to</span><span style=\"font-weight: bold\">(</span>resolution: int, splits:         <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>, update: bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span><span style=\"font-weight: bold\">)</span> -&gt;                   <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          pandas.core.frame.DataFrame: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Select molecules with a resolution worse </span> <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">than or equal to the given value.</span>                                      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                 <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">sample</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">sample</span><span style=\"font-weight: bold\">(</span>n: Optional<span style=\"font-weight: bold\">[</span>int<span style=\"font-weight: bold\">]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>, frac: Optional<span style=\"font-weight: bold\">[</span>float<span style=\"font-weight: bold\">]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>,      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>, replace: bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span>, update:     <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span><span style=\"font-weight: bold\">)</span> -&gt; pandas.core.frame.DataFrame: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Sample a subset of the </span>  <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">dataset.</span>                                                               <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">select_pdb_by_criterion</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">select_pdb_by_criterion</span><span style=\"font-weight: bold\">(</span>pdb: biopandas.pdb.pandas_pdb.PandasPdb,   <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          field: str, field_values: List<span style=\"font-weight: bold\">[</span>Any<span style=\"font-weight: bold\">])</span> -&gt;                                <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          biopandas.pdb.pandas_pdb.PandasPdb: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Filter a PDB using a field </span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">selection.</span>                                                             <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>               <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">split_by_deposition_date</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">split_by_deposition_date</span><span style=\"font-weight: bold\">(</span>df: pandas.core.frame.DataFrame, update:  <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span><span style=\"font-weight: bold\">)</span> -&gt; Dict<span style=\"font-weight: bold\">[</span>str, pandas.core.frame.DataFrame<span style=\"font-weight: bold\">]</span>: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Split </span>        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">molecules based on their deposition date.</span>                              <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                         <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">split_clusters</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">split_clusters</span><span style=\"font-weight: bold\">(</span>df: pandas.core.frame.DataFrame, update: bool =     <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span><span style=\"font-weight: bold\">)</span> -&gt; Dict<span style=\"font-weight: bold\">[</span>str, pandas.core.frame.DataFrame<span style=\"font-weight: bold\">]</span>: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Split clusters </span>      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">derived by MMseqs2.</span>                                                    <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>              <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">split_df_into_time_frames</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">split_df_into_time_frames</span><span style=\"font-weight: bold\">(</span>df: pandas.core.frame.DataFrame, splits: <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]</span>, split_time_frames: List<span style=\"font-weight: bold\">[</span>numpy.datetime64<span style=\"font-weight: bold\">])</span> -&gt; Dict<span style=\"font-weight: bold\">[</span>str,     <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          pandas.core.frame.DataFrame<span style=\"font-weight: bold\">]</span>: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Split the provided DataFrame </span>            <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">sequentially according to given time frames.</span>                           <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">split_df_proportionally</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">split_df_proportionally</span><span style=\"font-weight: bold\">(</span>df: pandas.core.frame.DataFrame, splits:   <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]</span>, split_ratios: List<span style=\"font-weight: bold\">[</span>float<span style=\"font-weight: bold\">]</span>, assign_leftover_rows_to_split_n: <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          int = <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0</span>, random_state: int = <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">42</span><span style=\"font-weight: bold\">)</span> -&gt; Dict<span style=\"font-weight: bold\">[</span>str,                          <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          pandas.core.frame.DataFrame<span style=\"font-weight: bold\">]</span>: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Split the provided DataFrame iteratively</span> <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">according to given proportions.</span>                                        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>         <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">to_chain_sequence_mapping_dict</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">to_chain_sequence_mapping_dict</span><span style=\"font-weight: bold\">(</span>splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]])</span> -&gt;     <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          Dict<span style=\"font-weight: bold\">[</span>str, str<span style=\"font-weight: bold\">]</span>: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Return a dictionary of sequences indexed by chains.</span>    <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                 <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">to_csv</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">to_csv</span><span style=\"font-weight: bold\">(</span>fname: str, splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span><span style=\"font-weight: bold\">)</span>: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Write the </span> <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">selection to a CSV file.</span>                                               <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                               <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">to_fasta</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">to_fasta</span><span style=\"font-weight: bold\">(</span>filename: str, splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span><span style=\"font-weight: bold\">)</span>: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Write</span> <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">the dataset to a FASTA file </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f; font-weight: bold\">(</span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">indexed by chain id</span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f; font-weight: bold\">)</span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">.</span>                     <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                           <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">write_chains</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">write_chains</span><span style=\"font-weight: bold\">(</span>splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>, models: List<span style=\"font-weight: bold\">[</span>int<span style=\"font-weight: bold\">]</span> <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          = <span style=\"font-weight: bold\">[</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">]</span>, force: bool = <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-style: italic\">False</span><span style=\"font-weight: bold\">)</span> -&gt; List<span style=\"font-weight: bold\">[</span>pathlib.Path<span style=\"font-weight: bold\">]</span>:                     <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Write chains in current selection to disk. e.g., we create a file</span>      <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">of the form ``4hbb_A.pdb`` for chain ``A`` of PDB file ``4hhb.pdb``.</span>   <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                          <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">write_df_pdbs</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">write_df_pdbs</span><span style=\"font-weight: bold\">(</span>pdb_dir: str, df: pandas.core.frame.DataFrame,       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          out_dir: str = <span style=\"color: #008000; text-decoration-color: #008000\">'collated_pdb'</span>, splits: Optional<span style=\"font-weight: bold\">[</span>List<span style=\"font-weight: bold\">[</span>str<span style=\"font-weight: bold\">]]</span> = <span style=\"color: #800080; text-decoration-color: #800080; font-style: italic\">None</span>,     <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          max_num_chains_per_pdb_code: int = <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)</span>: <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Write the given selection as a </span> <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">collection of PDB files.</span>                                               <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>             <span style=\"color: #808000; text-decoration-color: #808000; font-style: italic\">write_out_pdb_chain_groups</span> = <span style=\"color: #00ffff; text-decoration-color: #00ffff; font-style: italic\">def </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">write_out_pdb_chain_groups</span><span style=\"font-weight: bold\">(</span>df: pandas.core.frame.DataFrame,        <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          pdb_dir: str, out_dir: str, split: str, merge_fn: Callable,            <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          max_num_chains_per_pdb_code: int = <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)</span>:                                 <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">Record groups of PDB codes and associated chains</span>                       <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">│</span>                                          <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">as collated PDB files.</span>                                                 <span style=\"color: #000080; text-decoration-color: #000080\">│</span>\n",
       "<span style=\"color: #000080; text-decoration-color: #000080\">╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</span>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[34m╭─\u001b[0m\u001b[34m─────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34m<\u001b[0m\u001b[1;95mclass\u001b[0m\u001b[39m \u001b[0m\u001b[32m'graphein.ml.datasets.pdb_data.PDBManager'\u001b[0m\u001b[1;34m>\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n",
       "\u001b[34m│\u001b[0m \u001b[36mA utility for creating selections of experimental PDB structures.\u001b[0m                                               \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                                                                                                 \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m \u001b[32m╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m \u001b[32m│\u001b[0m \u001b[1m<\u001b[0m\u001b[1;95mgraphein.ml.datasets.pdb_data.PDBManager\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x7f63a02e5190\u001b[0m\u001b[1m>\u001b[0m                                         \u001b[32m│\u001b[0m \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m \u001b[32m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                                                                                                 \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                     \u001b[3;33mdf\u001b[0m =             id   pdb chain  length molecule_type  \\                    \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m0\u001b[0m       100d_A  100d     A      \u001b[1;36m10\u001b[0m            na                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m1\u001b[0m       100d_B  100d     B      \u001b[1;36m10\u001b[0m            na                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m2\u001b[0m       101d_A  101d     A      \u001b[1;36m12\u001b[0m            na                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m3\u001b[0m       101d_B  101d     B      \u001b[1;36m12\u001b[0m            na                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m4\u001b[0m       101m_A  101m     A     \u001b[1;36m154\u001b[0m       protein                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[33m...\u001b[0m        \u001b[33m...\u001b[0m   \u001b[33m...\u001b[0m   \u001b[33m...\u001b[0m     \u001b[33m...\u001b[0m           \u001b[33m...\u001b[0m                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780157\u001b[0m  9xia_A  9xia     A     \u001b[1;36m388\u001b[0m       protein                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780158\u001b[0m  9xim_A  9xim     A     \u001b[1;36m393\u001b[0m       protein                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780159\u001b[0m  9xim_B  9xim     B     \u001b[1;36m393\u001b[0m       protein                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780160\u001b[0m  9xim_C  9xim     C     \u001b[1;36m393\u001b[0m       protein                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780161\u001b[0m  9xim_D  9xim     D     \u001b[1;36m393\u001b[0m       protein                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                                                                                                 \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                                                                               name  \\           \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m0\u001b[0m       DNA/RNA \u001b[1m(\u001b[0m\u001b[1;36m5\u001b[0m'-\u001b[1;35mR\u001b[0m\u001b[1m(\u001b[0m*CP*\u001b[1m)\u001b[0m-\u001b[1;35mD\u001b[0m\u001b[1m(\u001b[0m*CP*GP*GP*CP*GP*CP*CP*GP\u001b[33m...\u001b[0m              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m1\u001b[0m       DNA/RNA \u001b[1m(\u001b[0m\u001b[1;36m5\u001b[0m'-\u001b[1;35mR\u001b[0m\u001b[1m(\u001b[0m*CP*\u001b[1m)\u001b[0m-\u001b[1;35mD\u001b[0m\u001b[1m(\u001b[0m*CP*GP*GP*CP*GP*CP*CP*GP\u001b[33m...\u001b[0m              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m2\u001b[0m       DNA \u001b[1m(\u001b[0m\u001b[1;36m5\u001b[0m'-\u001b[1;35mD\u001b[0m\u001b[1m(\u001b[0m*CP*GP*CP*GP*AP*AP*TP*TP*\u001b[1m(\u001b[0mCBR\u001b[1m)\u001b[0mP*GP*C\u001b[33m...\u001b[0m              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m3\u001b[0m       DNA \u001b[1m(\u001b[0m\u001b[1;36m5\u001b[0m'-\u001b[1;35mD\u001b[0m\u001b[1m(\u001b[0m*CP*GP*CP*GP*AP*AP*TP*TP*\u001b[1m(\u001b[0mCBR\u001b[1m)\u001b[0mP*GP*C\u001b[33m...\u001b[0m              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m4\u001b[0m                                               MYOGLOBIN              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[33m...\u001b[0m                                                   \u001b[33m...\u001b[0m              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780157\u001b[0m                                   XYLOSE ISOMERASE              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780158\u001b[0m                                 D-XYLOSE ISOMERASE              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780159\u001b[0m                                 D-XYLOSE ISOMERASE              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780160\u001b[0m                                 D-XYLOSE ISOMERASE              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780161\u001b[0m                                 D-XYLOSE ISOMERASE              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                                                                                                 \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                                                                           sequence split        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          n_chains  \\                                                            \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m0\u001b[0m                                              CCGGCGCCGG   N/A        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m2\u001b[0m                                                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m1\u001b[0m                                              CCGGCGCCGG   N/A        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m2\u001b[0m                                                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m2\u001b[0m                                            CGCGAATTCGCG   N/A        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m2\u001b[0m                                                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m3\u001b[0m                                            CGCGAATTCGCG   N/A        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m2\u001b[0m                                                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m4\u001b[0m       MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDR\u001b[33m...\u001b[0m   N/A        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m1\u001b[0m                                                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[33m...\u001b[0m                                                   \u001b[33m...\u001b[0m   \u001b[33m...\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[33m...\u001b[0m                                                                    \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780157\u001b[0m  MNYQPTPEDRFTFGLWTVGWQGRDPFGDATRRALDPVESVQRLAEL\u001b[33m...\u001b[0m   N/A        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m1\u001b[0m                                                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780158\u001b[0m  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG\u001b[33m...\u001b[0m   N/A        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m4\u001b[0m                                                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780159\u001b[0m  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG\u001b[33m...\u001b[0m   N/A        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m4\u001b[0m                                                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780160\u001b[0m  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG\u001b[33m...\u001b[0m   N/A        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m4\u001b[0m                                                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780161\u001b[0m  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG\u001b[33m...\u001b[0m   N/A        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m4\u001b[0m                                                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                                                                                                 \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                                          ligands                      source  resolution  \\     \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m0\u001b[0m                 \u001b[1m[\u001b[0mSPM\u001b[1m]\u001b[0m                                    \u001b[1;36m1.90\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m1\u001b[0m                 \u001b[1m[\u001b[0mSPM\u001b[1m]\u001b[0m                                    \u001b[1;36m1.90\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m2\u001b[0m         \u001b[1m[\u001b[0mCBR, MG, NT\u001b[1m]\u001b[0m                                    \u001b[1;36m2.25\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m3\u001b[0m         \u001b[1m[\u001b[0mCBR, MG, NT\u001b[1m]\u001b[0m                                    \u001b[1;36m2.25\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m4\u001b[0m       \u001b[1m[\u001b[0mHEM, NBN, SO4\u001b[1m]\u001b[0m            Physeter catodon        \u001b[1;36m2.07\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[33m...\u001b[0m                 \u001b[33m...\u001b[0m                         \u001b[33m...\u001b[0m         \u001b[33m...\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780157\u001b[0m        \u001b[1m[\u001b[0mDFR, MN\u001b[1m]\u001b[0m    Streptomyces rubiginosus        \u001b[1;36m1.90\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780158\u001b[0m        \u001b[1m[\u001b[0mMN, XLS\u001b[1m]\u001b[0m  Actinoplanes missouriensis        \u001b[1;36m2.40\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780159\u001b[0m        \u001b[1m[\u001b[0mMN, XLS\u001b[1m]\u001b[0m  Actinoplanes missouriensis        \u001b[1;36m2.40\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780160\u001b[0m        \u001b[1m[\u001b[0mMN, XLS\u001b[1m]\u001b[0m  Actinoplanes missouriensis        \u001b[1;36m2.40\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780161\u001b[0m        \u001b[1m[\u001b[0mMN, XLS\u001b[1m]\u001b[0m  Actinoplanes missouriensis        \u001b[1;36m2.40\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                                                                                                 \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                                 deposition_date experiment_type  pdb_file_available             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m0\u001b[0m           \u001b[1;36m1994\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m05\u001b[0m     diffraction                \u001b[3;92mTrue\u001b[0m             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m1\u001b[0m           \u001b[1;36m1994\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m05\u001b[0m     diffraction                \u001b[3;92mTrue\u001b[0m             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m2\u001b[0m           \u001b[1;36m1994\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m14\u001b[0m     diffraction                \u001b[3;92mTrue\u001b[0m             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m3\u001b[0m           \u001b[1;36m1994\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m14\u001b[0m     diffraction                \u001b[3;92mTrue\u001b[0m             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m4\u001b[0m           \u001b[1;36m1997\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m13\u001b[0m     diffraction                \u001b[3;92mTrue\u001b[0m             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[33m...\u001b[0m                \u001b[33m...\u001b[0m             \u001b[33m...\u001b[0m                 \u001b[33m...\u001b[0m             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780157\u001b[0m      \u001b[1;36m1990\u001b[0m-\u001b[1;36m10\u001b[0m-\u001b[1;36m11\u001b[0m     diffraction                \u001b[3;92mTrue\u001b[0m             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780158\u001b[0m      \u001b[1;36m1992\u001b[0m-\u001b[1;36m04\u001b[0m-\u001b[1;36m03\u001b[0m     diffraction                \u001b[3;92mTrue\u001b[0m             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780159\u001b[0m      \u001b[1;36m1992\u001b[0m-\u001b[1;36m04\u001b[0m-\u001b[1;36m03\u001b[0m     diffraction                \u001b[3;92mTrue\u001b[0m             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780160\u001b[0m      \u001b[1;36m1992\u001b[0m-\u001b[1;36m04\u001b[0m-\u001b[1;36m03\u001b[0m     diffraction                \u001b[3;92mTrue\u001b[0m             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780161\u001b[0m      \u001b[1;36m1992\u001b[0m-\u001b[1;36m04\u001b[0m-\u001b[1;36m03\u001b[0m     diffraction                \u001b[3;92mTrue\u001b[0m             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                                                                                                 \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1m[\u001b[0m\u001b[1;36m780162\u001b[0m rows x \u001b[1;36m15\u001b[0m columns\u001b[1m]\u001b[0m                                             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                    \u001b[3;33mligand_map_filename\u001b[0m = \u001b[32m'cc-to-pdb.tdd'\u001b[0m                                                        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                         \u001b[3;33mligand_map_url\u001b[0m = \u001b[32m'http://ligand-expo.rcsb.org/dictionaries/cc-to-pdb.tdd'\u001b[0m               \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                           \u001b[3;33mlist_columns\u001b[0m = \u001b[1m[\u001b[0m\u001b[32m'ligands'\u001b[0m\u001b[1m]\u001b[0m                                                            \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m              \u001b[3;33mpdb_availability_filename\u001b[0m = \u001b[32m'pdb_bundle_index.txt'\u001b[0m                                                 \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                   \u001b[3;33mpdb_availability_url\u001b[0m = \u001b[32m'https://files.wwpdb.org/pub/pdb/compatible/pdb_bundle/pdb_bundle_ind…\u001b[0m \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m           \u001b[3;33mpdb_deposition_date_filename\u001b[0m = \u001b[32m'entries.idx'\u001b[0m                                                          \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                \u001b[3;33mpdb_deposition_date_url\u001b[0m = \u001b[32m'https://files.wwpdb.org/pub/pdb/derived_data/index/entries.idx'\u001b[0m       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                \u001b[3;33mpdb_dir\u001b[0m = \u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'pdb'\u001b[0m\u001b[1m)\u001b[0m                                                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                \u001b[3;33mpdb_entry_type_filename\u001b[0m = \u001b[32m'pdb_entry_type.txt'\u001b[0m                                                   \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                     \u001b[3;33mpdb_entry_type_url\u001b[0m = \u001b[32m'https://files.wwpdb.org/pub/pdb/derived_data/pdb_entry_type.txt'\u001b[0m      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m            \u001b[3;33mpdb_seqres_archive_filename\u001b[0m = \u001b[32m'pdb_seqres.txt.gz'\u001b[0m                                                    \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                    \u001b[3;33mpdb_seqres_filename\u001b[0m = \u001b[32m'pdb_seqres.txt'\u001b[0m                                                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                      \u001b[3;33mpdb_sequences_url\u001b[0m = \u001b[32m'https://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt.gz'\u001b[0m         \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                    \u001b[3;33mresolution_filename\u001b[0m = \u001b[32m'resolu.idx'\u001b[0m                                                           \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                         \u001b[3;33mresolution_url\u001b[0m = \u001b[32m'https://files.wwpdb.org/pub/pdb/derived_data/index/resolu.idx'\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                               \u001b[3;33mroot_dir\u001b[0m = \u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'.'\u001b[0m\u001b[1m)\u001b[0m                                                         \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                 \u001b[3;33msource\u001b[0m =             id   pdb chain  length molecule_type  \\                    \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m0\u001b[0m       100d_A  100d     A      \u001b[1;36m10\u001b[0m            na                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m1\u001b[0m       100d_B  100d     B      \u001b[1;36m10\u001b[0m            na                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m2\u001b[0m       101d_A  101d     A      \u001b[1;36m12\u001b[0m            na                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m3\u001b[0m       101d_B  101d     B      \u001b[1;36m12\u001b[0m            na                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m4\u001b[0m       101m_A  101m     A     \u001b[1;36m154\u001b[0m       protein                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[33m...\u001b[0m        \u001b[33m...\u001b[0m   \u001b[33m...\u001b[0m   \u001b[33m...\u001b[0m     \u001b[33m...\u001b[0m           \u001b[33m...\u001b[0m                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780157\u001b[0m  9xia_A  9xia     A     \u001b[1;36m388\u001b[0m       protein                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780158\u001b[0m  9xim_A  9xim     A     \u001b[1;36m393\u001b[0m       protein                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780159\u001b[0m  9xim_B  9xim     B     \u001b[1;36m393\u001b[0m       protein                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780160\u001b[0m  9xim_C  9xim     C     \u001b[1;36m393\u001b[0m       protein                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780161\u001b[0m  9xim_D  9xim     D     \u001b[1;36m393\u001b[0m       protein                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                                                                                                 \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                                                                               name  \\           \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m0\u001b[0m       DNA/RNA \u001b[1m(\u001b[0m\u001b[1;36m5\u001b[0m'-\u001b[1;35mR\u001b[0m\u001b[1m(\u001b[0m*CP*\u001b[1m)\u001b[0m-\u001b[1;35mD\u001b[0m\u001b[1m(\u001b[0m*CP*GP*GP*CP*GP*CP*CP*GP\u001b[33m...\u001b[0m              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m1\u001b[0m       DNA/RNA \u001b[1m(\u001b[0m\u001b[1;36m5\u001b[0m'-\u001b[1;35mR\u001b[0m\u001b[1m(\u001b[0m*CP*\u001b[1m)\u001b[0m-\u001b[1;35mD\u001b[0m\u001b[1m(\u001b[0m*CP*GP*GP*CP*GP*CP*CP*GP\u001b[33m...\u001b[0m              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m2\u001b[0m       DNA \u001b[1m(\u001b[0m\u001b[1;36m5\u001b[0m'-\u001b[1;35mD\u001b[0m\u001b[1m(\u001b[0m*CP*GP*CP*GP*AP*AP*TP*TP*\u001b[1m(\u001b[0mCBR\u001b[1m)\u001b[0mP*GP*C\u001b[33m...\u001b[0m              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m3\u001b[0m       DNA \u001b[1m(\u001b[0m\u001b[1;36m5\u001b[0m'-\u001b[1;35mD\u001b[0m\u001b[1m(\u001b[0m*CP*GP*CP*GP*AP*AP*TP*TP*\u001b[1m(\u001b[0mCBR\u001b[1m)\u001b[0mP*GP*C\u001b[33m...\u001b[0m              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m4\u001b[0m                                               MYOGLOBIN              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[33m...\u001b[0m                                                   \u001b[33m...\u001b[0m              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780157\u001b[0m                                   XYLOSE ISOMERASE              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780158\u001b[0m                                 D-XYLOSE ISOMERASE              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780159\u001b[0m                                 D-XYLOSE ISOMERASE              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780160\u001b[0m                                 D-XYLOSE ISOMERASE              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780161\u001b[0m                                 D-XYLOSE ISOMERASE              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                                                                                                 \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                                                                           sequence split        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          n_chains  \\                                                            \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m0\u001b[0m                                              CCGGCGCCGG   N/A        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m2\u001b[0m                                                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m1\u001b[0m                                              CCGGCGCCGG   N/A        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m2\u001b[0m                                                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m2\u001b[0m                                            CGCGAATTCGCG   N/A        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m2\u001b[0m                                                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m3\u001b[0m                                            CGCGAATTCGCG   N/A        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m2\u001b[0m                                                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m4\u001b[0m       MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDR\u001b[33m...\u001b[0m   N/A        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m1\u001b[0m                                                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[33m...\u001b[0m                                                   \u001b[33m...\u001b[0m   \u001b[33m...\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[33m...\u001b[0m                                                                    \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780157\u001b[0m  MNYQPTPEDRFTFGLWTVGWQGRDPFGDATRRALDPVESVQRLAEL\u001b[33m...\u001b[0m   N/A        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m1\u001b[0m                                                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780158\u001b[0m  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG\u001b[33m...\u001b[0m   N/A        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m4\u001b[0m                                                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780159\u001b[0m  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG\u001b[33m...\u001b[0m   N/A        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m4\u001b[0m                                                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780160\u001b[0m  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG\u001b[33m...\u001b[0m   N/A        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m4\u001b[0m                                                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780161\u001b[0m  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG\u001b[33m...\u001b[0m   N/A        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m4\u001b[0m                                                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                                                                                                 \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                                          ligands                      source  resolution  \\     \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m0\u001b[0m                 \u001b[1m[\u001b[0mSPM\u001b[1m]\u001b[0m                                    \u001b[1;36m1.90\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m1\u001b[0m                 \u001b[1m[\u001b[0mSPM\u001b[1m]\u001b[0m                                    \u001b[1;36m1.90\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m2\u001b[0m         \u001b[1m[\u001b[0mCBR, MG, NT\u001b[1m]\u001b[0m                                    \u001b[1;36m2.25\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m3\u001b[0m         \u001b[1m[\u001b[0mCBR, MG, NT\u001b[1m]\u001b[0m                                    \u001b[1;36m2.25\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m4\u001b[0m       \u001b[1m[\u001b[0mHEM, NBN, SO4\u001b[1m]\u001b[0m            Physeter catodon        \u001b[1;36m2.07\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[33m...\u001b[0m                 \u001b[33m...\u001b[0m                         \u001b[33m...\u001b[0m         \u001b[33m...\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780157\u001b[0m        \u001b[1m[\u001b[0mDFR, MN\u001b[1m]\u001b[0m    Streptomyces rubiginosus        \u001b[1;36m1.90\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780158\u001b[0m        \u001b[1m[\u001b[0mMN, XLS\u001b[1m]\u001b[0m  Actinoplanes missouriensis        \u001b[1;36m2.40\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780159\u001b[0m        \u001b[1m[\u001b[0mMN, XLS\u001b[1m]\u001b[0m  Actinoplanes missouriensis        \u001b[1;36m2.40\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780160\u001b[0m        \u001b[1m[\u001b[0mMN, XLS\u001b[1m]\u001b[0m  Actinoplanes missouriensis        \u001b[1;36m2.40\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780161\u001b[0m        \u001b[1m[\u001b[0mMN, XLS\u001b[1m]\u001b[0m  Actinoplanes missouriensis        \u001b[1;36m2.40\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                                                                                                 \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                                 deposition_date experiment_type  pdb_file_available             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m0\u001b[0m           \u001b[1;36m1994\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m05\u001b[0m     diffraction                \u001b[3;92mTrue\u001b[0m             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m1\u001b[0m           \u001b[1;36m1994\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m05\u001b[0m     diffraction                \u001b[3;92mTrue\u001b[0m             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m2\u001b[0m           \u001b[1;36m1994\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m14\u001b[0m     diffraction                \u001b[3;92mTrue\u001b[0m             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m3\u001b[0m           \u001b[1;36m1994\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m14\u001b[0m     diffraction                \u001b[3;92mTrue\u001b[0m             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m4\u001b[0m           \u001b[1;36m1997\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m13\u001b[0m     diffraction                \u001b[3;92mTrue\u001b[0m             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[33m...\u001b[0m                \u001b[33m...\u001b[0m             \u001b[33m...\u001b[0m                 \u001b[33m...\u001b[0m             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780157\u001b[0m      \u001b[1;36m1990\u001b[0m-\u001b[1;36m10\u001b[0m-\u001b[1;36m11\u001b[0m     diffraction                \u001b[3;92mTrue\u001b[0m             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780158\u001b[0m      \u001b[1;36m1992\u001b[0m-\u001b[1;36m04\u001b[0m-\u001b[1;36m03\u001b[0m     diffraction                \u001b[3;92mTrue\u001b[0m             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780159\u001b[0m      \u001b[1;36m1992\u001b[0m-\u001b[1;36m04\u001b[0m-\u001b[1;36m03\u001b[0m     diffraction                \u001b[3;92mTrue\u001b[0m             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780160\u001b[0m      \u001b[1;36m1992\u001b[0m-\u001b[1;36m04\u001b[0m-\u001b[1;36m03\u001b[0m     diffraction                \u001b[3;92mTrue\u001b[0m             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;36m780161\u001b[0m      \u001b[1;36m1992\u001b[0m-\u001b[1;36m04\u001b[0m-\u001b[1;36m03\u001b[0m     diffraction                \u001b[3;92mTrue\u001b[0m             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                                                                                                 \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1m[\u001b[0m\u001b[1;36m780162\u001b[0m rows x \u001b[1;36m15\u001b[0m columns\u001b[1m]\u001b[0m                                             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                    \u001b[3;33msource_map_filename\u001b[0m = \u001b[32m'source.idx'\u001b[0m                                                           \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                         \u001b[3;33msource_map_url\u001b[0m = \u001b[32m'https://files.wwpdb.org/pub/pdb/derived_data/index/source.idx'\u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                        \u001b[3;33msplits_provided\u001b[0m = \u001b[3;91mFalse\u001b[0m                                                                  \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                \u001b[3;33mcluster\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mcluster\u001b[0m\u001b[1m(\u001b[0mmin_seq_id: float = \u001b[1;36m0.3\u001b[0m, coverage: float = \u001b[1;36m0.8\u001b[0m, update:    \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          bool = \u001b[3;91mFalse\u001b[0m, fasta_fname: Optional\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m, cluster_fname:        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          Optional\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m, overwrite: bool = \u001b[3;91mFalse\u001b[0m\u001b[1m)\u001b[0m ->                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          Union\u001b[1m[\u001b[0mpandas.core.frame.DataFrame, Dict\u001b[1m[\u001b[0mstr,                           \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          pandas.core.frame.DataFrame\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m: \u001b[2mCluster sequences in selection using \u001b[0m   \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mMMseqs2.\u001b[0m                                                               \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                         \u001b[3;33mcompare_length\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mcompare_length\u001b[0m\u001b[1m(\u001b[0mlength: int, comparison: str = \u001b[32m'equal'\u001b[0m,             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          compare_pdb_groups: bool = \u001b[3;91mFalse\u001b[0m, splits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m,  \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          update: bool = \u001b[3;91mFalse\u001b[0m\u001b[1m)\u001b[0m: \u001b[2mSelect molecules with a given length.\u001b[0m           \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                      \u001b[3;33mdownload_metadata\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mdownload_metadata\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[2mDownload all PDB metadata.\u001b[0m                    \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                          \u001b[3;33mdownload_pdbs\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mdownload_pdbs\u001b[0m\u001b[1m(\u001b[0m\u001b[33mout_dir\u001b[0m=\u001b[32m'.'\u001b[0m, \u001b[33mformat\u001b[0m=\u001b[32m'pdb'\u001b[0m, splits:                   \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m, overwrite: bool = \u001b[3;91mFalse\u001b[0m, max_workers: int  \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          = \u001b[1;36m8\u001b[0m, chunksize: int = \u001b[1;36m32\u001b[0m\u001b[1m)\u001b[0m: \u001b[2mDownload PDB files in the current \u001b[0m          \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mselection.\u001b[0m                                                             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                        \u001b[3;33mexperiment_type\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mexperiment_type\u001b[0m\u001b[1m(\u001b[0mtype: str = \u001b[32m'diffraction'\u001b[0m, splits:                 \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m, update: bool = \u001b[3;91mFalse\u001b[0m\u001b[1m)\u001b[0m ->                   \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          pandas.core.frame.DataFrame: \u001b[2mSelect molecules by experiment type. \u001b[0m     \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[1;2m[\u001b[0m\u001b[2m`diffraction`, `NMR`, `EM`, `other`\u001b[0m\u001b[1;2m]\u001b[0m                                  \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                            \u001b[3;33mexport_pdbs\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mexport_pdbs\u001b[0m\u001b[1m(\u001b[0mpdb_dir: str, splits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m,      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          max_num_chains_per_pdb_code: int = \u001b[1;36m1\u001b[0m, force: bool = \u001b[3;91mFalse\u001b[0m\u001b[1m)\u001b[0m: \u001b[2mWrite the \u001b[0m \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mselection as a collection of PDB files.\u001b[0m                                \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m              \u001b[3;33mfilter_by_deposition_date\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mfilter_by_deposition_date\u001b[0m\u001b[1m(\u001b[0mmax_deposition_date: numpy.datetime64,   \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          update: bool = \u001b[3;91mFalse\u001b[0m\u001b[1m)\u001b[0m -> Union\u001b[1m[\u001b[0mpandas.core.frame.DataFrame, Dict\u001b[1m[\u001b[0mstr,  \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          pandas.core.frame.DataFrame\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m: \u001b[2mSelect molecules deposited on or before\u001b[0m \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2ma given date.\u001b[0m                                                          \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                             \u001b[3;33mfrom_fasta\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mfrom_fasta\u001b[0m\u001b[1m(\u001b[0mids: str, filename: str, splits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m =  \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[3;35mNone\u001b[0m\u001b[1m)\u001b[0m -> pandas.core.frame.DataFrame: \u001b[2mCreate a selection from a FASTA \u001b[0m \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mfile.\u001b[0m                                                                  \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                    \u001b[3;33mget_best_resolution\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mget_best_resolution\u001b[0m\u001b[1m(\u001b[0msplits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m\u001b[1m)\u001b[0m -> float:  \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mReturn the best resolution in the dataset.\u001b[0m                             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                   \u001b[3;33mget_experiment_types\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mget_experiment_types\u001b[0m\u001b[1m(\u001b[0msplits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m\u001b[1m)\u001b[0m ->        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          List\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m: \u001b[2mReturn list of different experiment types in the dataset.\u001b[0m   \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                      \u001b[3;33mget_longest_chain\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mget_longest_chain\u001b[0m\u001b[1m(\u001b[0msplits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m\u001b[1m)\u001b[0m -> int:      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mReturn the length of the longest chain in the dataset.\u001b[0m                 \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                     \u001b[3;33mget_molecule_names\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mget_molecule_names\u001b[0m\u001b[1m(\u001b[0msplits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m\u001b[1m)\u001b[0m ->          \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          List\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m: \u001b[2mReturn list of molecule names in the dataset.\u001b[0m               \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                     \u001b[3;33mget_molecule_types\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mget_molecule_types\u001b[0m\u001b[1m(\u001b[0msplits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m\u001b[1m)\u001b[0m ->          \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          List\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m: \u001b[2mReturn list of different molecule types in the dataset.\u001b[0m     \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                         \u001b[3;33mget_num_chains\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mget_num_chains\u001b[0m\u001b[1m(\u001b[0msplits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m\u001b[1m)\u001b[0m -> int: \u001b[2mReturn \u001b[0m \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mthe number of chains in the dataset.\u001b[0m                                   \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                    \u001b[3;33mget_num_unique_pdbs\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mget_num_unique_pdbs\u001b[0m\u001b[1m(\u001b[0msplits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m\u001b[1m)\u001b[0m -> int:    \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mReturn the number of unique PDB IDs in the dataset.\u001b[0m                    \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                     \u001b[3;33mget_shortest_chain\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mget_shortest_chain\u001b[0m\u001b[1m(\u001b[0msplits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m\u001b[1m)\u001b[0m -> int:     \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mReturn the length of the shortest chain in the dataset.\u001b[0m                \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                             \u001b[3;33mget_splits\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mget_splits\u001b[0m\u001b[1m(\u001b[0msplits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m, df_splits:          \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          Optional\u001b[1m[\u001b[0mDict\u001b[1m[\u001b[0mstr, pandas.core.frame.DataFrame\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m, source: bool  \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          = \u001b[3;91mFalse\u001b[0m\u001b[1m)\u001b[0m -> pandas.core.frame.DataFrame: \u001b[2mReturn DataFrame entries \u001b[0m     \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mbelonging to the splits given.\u001b[0m                                         \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m              \u001b[3;33mget_unavailable_pdb_files\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mget_unavailable_pdb_files\u001b[0m\u001b[1m(\u001b[0msplits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m\u001b[1m)\u001b[0m ->   \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          List\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m: \u001b[2mReturns a list of PDB files unavailable for download.\u001b[0m       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                        \u001b[3;33mget_unique_pdbs\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mget_unique_pdbs\u001b[0m\u001b[1m(\u001b[0msplits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m\u001b[1m)\u001b[0m -> List\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m:  \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mReturn a list of unique PDB IDs in the dataset.\u001b[0m                        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                   \u001b[3;33mget_worst_resolution\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mget_worst_resolution\u001b[0m\u001b[1m(\u001b[0msplits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m\u001b[1m)\u001b[0m -> float: \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mReturn the worst resolution in the dataset.\u001b[0m                            \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                             \u001b[3;33mhas_ligand\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mhas_ligand\u001b[0m\u001b[1m(\u001b[0mligand: str, splits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m,        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          update: bool = \u001b[3;91mFalse\u001b[0m\u001b[1m)\u001b[0m -> pandas.core.frame.DataFrame: \u001b[2mSelect molecules\u001b[0m \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mthat contain a given ligand.\u001b[0m                                           \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                            \u001b[3;33mhas_ligands\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mhas_ligands\u001b[0m\u001b[1m(\u001b[0mligands: List\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m, splits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m =      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[3;35mNone\u001b[0m, inverse: bool = \u001b[3;91mFalse\u001b[0m, update: bool = \u001b[3;91mFalse\u001b[0m\u001b[1m)\u001b[0m: \u001b[2mSelect molecules \u001b[0m  \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mthat contain all ligands in the provided list.\u001b[0m                         \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                        \u001b[3;33mlength_equal_to\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mlength_equal_to\u001b[0m\u001b[1m(\u001b[0mlength: int, splits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m,   \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          update: bool = \u001b[3;91mFalse\u001b[0m\u001b[1m)\u001b[0m -> pandas.core.frame.DataFrame: \u001b[2mSelect molecules\u001b[0m \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mequal to a given length.\u001b[0m                                               \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                     \u001b[3;33mlength_longer_than\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mlength_longer_than\u001b[0m\u001b[1m(\u001b[0mlength: int, splits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m =      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[3;35mNone\u001b[0m, update: bool = \u001b[3;91mFalse\u001b[0m\u001b[1m)\u001b[0m -> pandas.core.frame.DataFrame: \u001b[2mSelect \u001b[0m    \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mmolecules longer than a given length.\u001b[0m                                  \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                    \u001b[3;33mlength_shorter_than\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mlength_shorter_than\u001b[0m\u001b[1m(\u001b[0mlength: int, splits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m =     \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[3;35mNone\u001b[0m, update: bool = \u001b[3;91mFalse\u001b[0m\u001b[1m)\u001b[0m -> pandas.core.frame.DataFrame: \u001b[2mSelect \u001b[0m    \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mmolecules shorter than a given length.\u001b[0m                                 \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                        \u001b[3;33mmerge_df_splits\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mmerge_df_splits\u001b[0m\u001b[1m(\u001b[0mfirst_df_split: pandas.core.frame.DataFrame,       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          second_df_split: pandas.core.frame.DataFrame, split: str\u001b[1m)\u001b[0m ->           \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          pandas.core.frame.DataFrame: \u001b[2mReconcile an existing DataFrame split \u001b[0m    \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mwith a new split.\u001b[0m                                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                 \u001b[3;33mmerge_pdb_chain_groups\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mmerge_pdb_chain_groups\u001b[0m\u001b[1m(\u001b[0mgroup:                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          pandas.core.groupby.generic.DataFrameGroupBy\u001b[1m)\u001b[0m ->                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          pandas.core.frame.DataFrame: \u001b[2mCombine groups of chains associated with \u001b[0m \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mthe same PDB code.\u001b[0m                                                     \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                          \u001b[3;33mmolecule_type\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mmolecule_type\u001b[0m\u001b[1m(\u001b[0mtype: str = \u001b[32m'protein'\u001b[0m, splits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[3;35mNone\u001b[0m, update: bool = \u001b[3;91mFalse\u001b[0m\u001b[1m)\u001b[0m -> pandas.core.frame.DataFrame: \u001b[2mSelect \u001b[0m    \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mmolecules by molecule type. \u001b[0m\u001b[1;2m[\u001b[0m\u001b[2m`protein`, `dna`, `rna`\u001b[0m\u001b[1;2m]\u001b[0m                  \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                             \u001b[3;33moligomeric\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31moligomeric\u001b[0m\u001b[1m(\u001b[0moligomer: int = \u001b[1;36m1\u001b[0m, comparison: str = \u001b[32m'equal'\u001b[0m, splits:   \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m, update: bool = \u001b[3;91mFalse\u001b[0m\u001b[1m)\u001b[0m ->                   \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          pandas.core.frame.DataFrame:                                           \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mSelect molecules with a given oligmeric length.\u001b[0m                        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mI.e. ``df.n_chains ==\u001b[0m\u001b[2;35m/\u001b[0m\u001b[1;2m<\u001b[0m\u001b[2;35m/\u001b[0m\u001b[1;2m>\u001b[0m\u001b[2m  oligomer``\u001b[0m                                  \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                  \u001b[3;33mparse\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mparse\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m -> pandas.core.frame.DataFrame: \u001b[2mParse all PDB sequence \u001b[0m    \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mrecords.\u001b[0m                                                               \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m \u001b[3;33mremove_non_standard_alphabet_sequences\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mremove_non_standard_alphabet_sequences\u001b[0m\u001b[1m(\u001b[0msplits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          = \u001b[3;35mNone\u001b[0m, update: bool = \u001b[3;91mFalse\u001b[0m\u001b[1m)\u001b[0m: \u001b[2mRemove sequences with non-standard \u001b[0m     \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mcharacters.\u001b[0m                                                            \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                \u001b[3;33mremove_unavailable_pdbs\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mremove_unavailable_pdbs\u001b[0m\u001b[1m(\u001b[0msplits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m,        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          update: bool = \u001b[3;91mFalse\u001b[0m\u001b[1m)\u001b[0m -> pandas.core.frame.DataFrame:                  \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mRemoves PDB files that are not available for download from the\u001b[0m         \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mselection.\u001b[0m                                                             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                  \u001b[3;33mreset\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mreset\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m -> pandas.core.frame.DataFrame: \u001b[2mReset the dataset to the \u001b[0m  \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2moriginal DataFrame source.\u001b[0m                                             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m     \u001b[3;33mresolution_better_than_or_equal_to\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mresolution_better_than_or_equal_to\u001b[0m\u001b[1m(\u001b[0mresolution: float, splits:      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m, update: bool = \u001b[3;91mFalse\u001b[0m\u001b[1m)\u001b[0m ->                   \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          pandas.core.frame.DataFrame: \u001b[2mSelect molecules with a resolution better\u001b[0m \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mthan or equal to the given value.\u001b[0m                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m      \u001b[3;33mresolution_worse_than_or_equal_to\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mresolution_worse_than_or_equal_to\u001b[0m\u001b[1m(\u001b[0mresolution: int, splits:         \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m, update: bool = \u001b[3;91mFalse\u001b[0m\u001b[1m)\u001b[0m ->                   \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          pandas.core.frame.DataFrame: \u001b[2mSelect molecules with a resolution worse \u001b[0m \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mthan or equal to the given value.\u001b[0m                                      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                 \u001b[3;33msample\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31msample\u001b[0m\u001b[1m(\u001b[0mn: Optional\u001b[1m[\u001b[0mint\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m, frac: Optional\u001b[1m[\u001b[0mfloat\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m,      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          splits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m, replace: bool = \u001b[3;91mFalse\u001b[0m, update:     \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          bool = \u001b[3;91mFalse\u001b[0m\u001b[1m)\u001b[0m -> pandas.core.frame.DataFrame: \u001b[2mSample a subset of the \u001b[0m  \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mdataset.\u001b[0m                                                               \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                \u001b[3;33mselect_pdb_by_criterion\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mselect_pdb_by_criterion\u001b[0m\u001b[1m(\u001b[0mpdb: biopandas.pdb.pandas_pdb.PandasPdb,   \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          field: str, field_values: List\u001b[1m[\u001b[0mAny\u001b[1m]\u001b[0m\u001b[1m)\u001b[0m ->                                \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          biopandas.pdb.pandas_pdb.PandasPdb: \u001b[2mFilter a PDB using a field \u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mselection.\u001b[0m                                                             \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m               \u001b[3;33msplit_by_deposition_date\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31msplit_by_deposition_date\u001b[0m\u001b[1m(\u001b[0mdf: pandas.core.frame.DataFrame, update:  \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          bool = \u001b[3;91mFalse\u001b[0m\u001b[1m)\u001b[0m -> Dict\u001b[1m[\u001b[0mstr, pandas.core.frame.DataFrame\u001b[1m]\u001b[0m: \u001b[2mSplit \u001b[0m        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mmolecules based on their deposition date.\u001b[0m                              \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                         \u001b[3;33msplit_clusters\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31msplit_clusters\u001b[0m\u001b[1m(\u001b[0mdf: pandas.core.frame.DataFrame, update: bool =     \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[3;91mFalse\u001b[0m\u001b[1m)\u001b[0m -> Dict\u001b[1m[\u001b[0mstr, pandas.core.frame.DataFrame\u001b[1m]\u001b[0m: \u001b[2mSplit clusters \u001b[0m      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mderived by MMseqs2.\u001b[0m                                                    \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m              \u001b[3;33msplit_df_into_time_frames\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31msplit_df_into_time_frames\u001b[0m\u001b[1m(\u001b[0mdf: pandas.core.frame.DataFrame, splits: \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          List\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m, split_time_frames: List\u001b[1m[\u001b[0mnumpy.datetime64\u001b[1m]\u001b[0m\u001b[1m)\u001b[0m -> Dict\u001b[1m[\u001b[0mstr,     \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          pandas.core.frame.DataFrame\u001b[1m]\u001b[0m: \u001b[2mSplit the provided DataFrame \u001b[0m            \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2msequentially according to given time frames.\u001b[0m                           \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                \u001b[3;33msplit_df_proportionally\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31msplit_df_proportionally\u001b[0m\u001b[1m(\u001b[0mdf: pandas.core.frame.DataFrame, splits:   \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          List\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m, split_ratios: List\u001b[1m[\u001b[0mfloat\u001b[1m]\u001b[0m, assign_leftover_rows_to_split_n: \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          int = \u001b[1;36m0\u001b[0m, random_state: int = \u001b[1;36m42\u001b[0m\u001b[1m)\u001b[0m -> Dict\u001b[1m[\u001b[0mstr,                          \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          pandas.core.frame.DataFrame\u001b[1m]\u001b[0m: \u001b[2mSplit the provided DataFrame iteratively\u001b[0m \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2maccording to given proportions.\u001b[0m                                        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m         \u001b[3;33mto_chain_sequence_mapping_dict\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mto_chain_sequence_mapping_dict\u001b[0m\u001b[1m(\u001b[0msplits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m\u001b[1m)\u001b[0m ->     \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          Dict\u001b[1m[\u001b[0mstr, str\u001b[1m]\u001b[0m: \u001b[2mReturn a dictionary of sequences indexed by chains.\u001b[0m    \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                 \u001b[3;33mto_csv\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mto_csv\u001b[0m\u001b[1m(\u001b[0mfname: str, splits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m\u001b[1m)\u001b[0m: \u001b[2mWrite the \u001b[0m \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mselection to a CSV file.\u001b[0m                                               \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                               \u001b[3;33mto_fasta\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mto_fasta\u001b[0m\u001b[1m(\u001b[0mfilename: str, splits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m\u001b[1m)\u001b[0m: \u001b[2mWrite\u001b[0m \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mthe dataset to a FASTA file \u001b[0m\u001b[1;2m(\u001b[0m\u001b[2mindexed by chain id\u001b[0m\u001b[1;2m)\u001b[0m\u001b[2m.\u001b[0m                     \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                           \u001b[3;33mwrite_chains\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mwrite_chains\u001b[0m\u001b[1m(\u001b[0msplits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m, models: List\u001b[1m[\u001b[0mint\u001b[1m]\u001b[0m \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          = \u001b[1m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1m]\u001b[0m, force: bool = \u001b[3;91mFalse\u001b[0m\u001b[1m)\u001b[0m -> List\u001b[1m[\u001b[0mpathlib.Path\u001b[1m]\u001b[0m:                     \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mWrite chains in current selection to disk. e.g., we create a file\u001b[0m      \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mof the form ``4hbb_A.pdb`` for chain ``A`` of PDB file ``4hhb.pdb``.\u001b[0m   \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                          \u001b[3;33mwrite_df_pdbs\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mwrite_df_pdbs\u001b[0m\u001b[1m(\u001b[0mpdb_dir: str, df: pandas.core.frame.DataFrame,       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          out_dir: str = \u001b[32m'collated_pdb'\u001b[0m, splits: Optional\u001b[1m[\u001b[0mList\u001b[1m[\u001b[0mstr\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m = \u001b[3;35mNone\u001b[0m,     \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          max_num_chains_per_pdb_code: int = \u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m: \u001b[2mWrite the given selection as a \u001b[0m \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mcollection of PDB files.\u001b[0m                                               \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m             \u001b[3;33mwrite_out_pdb_chain_groups\u001b[0m = \u001b[3;96mdef \u001b[0m\u001b[1;31mwrite_out_pdb_chain_groups\u001b[0m\u001b[1m(\u001b[0mdf: pandas.core.frame.DataFrame,        \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          pdb_dir: str, out_dir: str, split: str, merge_fn: Callable,            \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          max_num_chains_per_pdb_code: int = \u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m:                                 \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mRecord groups of PDB codes and associated chains\u001b[0m                       \u001b[34m│\u001b[0m\n",
       "\u001b[34m│\u001b[0m                                          \u001b[2mas collated PDB files.\u001b[0m                                                 \u001b[34m│\u001b[0m\n",
       "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "!pip install graphein\n",
    "!pip install torch_geometric\n",
    "from rich import inspect\n",
    "from graphein.ml.datasets import PDBManager\n",
    "\n",
    "manager = PDBManager(root_dir=\".\")\n",
    "inspect(manager, methods=True)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The manager wraps two dataframes:\n",
    "\n",
    "* `manager.df` - your working selection.\n",
    "* `manager.source` - A clean copy of the original metadata."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>pdb</th>\n",
       "      <th>chain</th>\n",
       "      <th>length</th>\n",
       "      <th>molecule_type</th>\n",
       "      <th>name</th>\n",
       "      <th>sequence</th>\n",
       "      <th>split</th>\n",
       "      <th>n_chains</th>\n",
       "      <th>ligands</th>\n",
       "      <th>source</th>\n",
       "      <th>resolution</th>\n",
       "      <th>deposition_date</th>\n",
       "      <th>experiment_type</th>\n",
       "      <th>pdb_file_available</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100d_A</td>\n",
       "      <td>100d</td>\n",
       "      <td>A</td>\n",
       "      <td>10</td>\n",
       "      <td>na</td>\n",
       "      <td>DNA/RNA (5'-R(*CP*)-D(*CP*GP*GP*CP*GP*CP*CP*GP...</td>\n",
       "      <td>CCGGCGCCGG</td>\n",
       "      <td>N/A</td>\n",
       "      <td>2</td>\n",
       "      <td>[SPM]</td>\n",
       "      <td></td>\n",
       "      <td>1.90</td>\n",
       "      <td>1994-12-05</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100d_B</td>\n",
       "      <td>100d</td>\n",
       "      <td>B</td>\n",
       "      <td>10</td>\n",
       "      <td>na</td>\n",
       "      <td>DNA/RNA (5'-R(*CP*)-D(*CP*GP*GP*CP*GP*CP*CP*GP...</td>\n",
       "      <td>CCGGCGCCGG</td>\n",
       "      <td>N/A</td>\n",
       "      <td>2</td>\n",
       "      <td>[SPM]</td>\n",
       "      <td></td>\n",
       "      <td>1.90</td>\n",
       "      <td>1994-12-05</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>101d_A</td>\n",
       "      <td>101d</td>\n",
       "      <td>A</td>\n",
       "      <td>12</td>\n",
       "      <td>na</td>\n",
       "      <td>DNA (5'-D(*CP*GP*CP*GP*AP*AP*TP*TP*(CBR)P*GP*C...</td>\n",
       "      <td>CGCGAATTCGCG</td>\n",
       "      <td>N/A</td>\n",
       "      <td>2</td>\n",
       "      <td>[CBR, MG, NT]</td>\n",
       "      <td></td>\n",
       "      <td>2.25</td>\n",
       "      <td>1994-12-14</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>101d_B</td>\n",
       "      <td>101d</td>\n",
       "      <td>B</td>\n",
       "      <td>12</td>\n",
       "      <td>na</td>\n",
       "      <td>DNA (5'-D(*CP*GP*CP*GP*AP*AP*TP*TP*(CBR)P*GP*C...</td>\n",
       "      <td>CGCGAATTCGCG</td>\n",
       "      <td>N/A</td>\n",
       "      <td>2</td>\n",
       "      <td>[CBR, MG, NT]</td>\n",
       "      <td></td>\n",
       "      <td>2.25</td>\n",
       "      <td>1994-12-14</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>101m_A</td>\n",
       "      <td>101m</td>\n",
       "      <td>A</td>\n",
       "      <td>154</td>\n",
       "      <td>protein</td>\n",
       "      <td>MYOGLOBIN</td>\n",
       "      <td>MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDR...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[HEM, NBN, SO4]</td>\n",
       "      <td>Physeter catodon</td>\n",
       "      <td>2.07</td>\n",
       "      <td>1997-12-13</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>780157</th>\n",
       "      <td>9xia_A</td>\n",
       "      <td>9xia</td>\n",
       "      <td>A</td>\n",
       "      <td>388</td>\n",
       "      <td>protein</td>\n",
       "      <td>XYLOSE ISOMERASE</td>\n",
       "      <td>MNYQPTPEDRFTFGLWTVGWQGRDPFGDATRRALDPVESVQRLAEL...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[DFR, MN]</td>\n",
       "      <td>Streptomyces rubiginosus</td>\n",
       "      <td>1.90</td>\n",
       "      <td>1990-10-11</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>780158</th>\n",
       "      <td>9xim_A</td>\n",
       "      <td>9xim</td>\n",
       "      <td>A</td>\n",
       "      <td>393</td>\n",
       "      <td>protein</td>\n",
       "      <td>D-XYLOSE ISOMERASE</td>\n",
       "      <td>SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>4</td>\n",
       "      <td>[MN, XLS]</td>\n",
       "      <td>Actinoplanes missouriensis</td>\n",
       "      <td>2.40</td>\n",
       "      <td>1992-04-03</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>780159</th>\n",
       "      <td>9xim_B</td>\n",
       "      <td>9xim</td>\n",
       "      <td>B</td>\n",
       "      <td>393</td>\n",
       "      <td>protein</td>\n",
       "      <td>D-XYLOSE ISOMERASE</td>\n",
       "      <td>SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>4</td>\n",
       "      <td>[MN, XLS]</td>\n",
       "      <td>Actinoplanes missouriensis</td>\n",
       "      <td>2.40</td>\n",
       "      <td>1992-04-03</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>780160</th>\n",
       "      <td>9xim_C</td>\n",
       "      <td>9xim</td>\n",
       "      <td>C</td>\n",
       "      <td>393</td>\n",
       "      <td>protein</td>\n",
       "      <td>D-XYLOSE ISOMERASE</td>\n",
       "      <td>SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>4</td>\n",
       "      <td>[MN, XLS]</td>\n",
       "      <td>Actinoplanes missouriensis</td>\n",
       "      <td>2.40</td>\n",
       "      <td>1992-04-03</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>780161</th>\n",
       "      <td>9xim_D</td>\n",
       "      <td>9xim</td>\n",
       "      <td>D</td>\n",
       "      <td>393</td>\n",
       "      <td>protein</td>\n",
       "      <td>D-XYLOSE ISOMERASE</td>\n",
       "      <td>SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>4</td>\n",
       "      <td>[MN, XLS]</td>\n",
       "      <td>Actinoplanes missouriensis</td>\n",
       "      <td>2.40</td>\n",
       "      <td>1992-04-03</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>780162 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            id   pdb chain  length molecule_type  \\\n",
       "0       100d_A  100d     A      10            na   \n",
       "1       100d_B  100d     B      10            na   \n",
       "2       101d_A  101d     A      12            na   \n",
       "3       101d_B  101d     B      12            na   \n",
       "4       101m_A  101m     A     154       protein   \n",
       "...        ...   ...   ...     ...           ...   \n",
       "780157  9xia_A  9xia     A     388       protein   \n",
       "780158  9xim_A  9xim     A     393       protein   \n",
       "780159  9xim_B  9xim     B     393       protein   \n",
       "780160  9xim_C  9xim     C     393       protein   \n",
       "780161  9xim_D  9xim     D     393       protein   \n",
       "\n",
       "                                                     name  \\\n",
       "0       DNA/RNA (5'-R(*CP*)-D(*CP*GP*GP*CP*GP*CP*CP*GP...   \n",
       "1       DNA/RNA (5'-R(*CP*)-D(*CP*GP*GP*CP*GP*CP*CP*GP...   \n",
       "2       DNA (5'-D(*CP*GP*CP*GP*AP*AP*TP*TP*(CBR)P*GP*C...   \n",
       "3       DNA (5'-D(*CP*GP*CP*GP*AP*AP*TP*TP*(CBR)P*GP*C...   \n",
       "4                                               MYOGLOBIN   \n",
       "...                                                   ...   \n",
       "780157                                   XYLOSE ISOMERASE   \n",
       "780158                                 D-XYLOSE ISOMERASE   \n",
       "780159                                 D-XYLOSE ISOMERASE   \n",
       "780160                                 D-XYLOSE ISOMERASE   \n",
       "780161                                 D-XYLOSE ISOMERASE   \n",
       "\n",
       "                                                 sequence split  n_chains  \\\n",
       "0                                              CCGGCGCCGG   N/A         2   \n",
       "1                                              CCGGCGCCGG   N/A         2   \n",
       "2                                            CGCGAATTCGCG   N/A         2   \n",
       "3                                            CGCGAATTCGCG   N/A         2   \n",
       "4       MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDR...   N/A         1   \n",
       "...                                                   ...   ...       ...   \n",
       "780157  MNYQPTPEDRFTFGLWTVGWQGRDPFGDATRRALDPVESVQRLAEL...   N/A         1   \n",
       "780158  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...   N/A         4   \n",
       "780159  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...   N/A         4   \n",
       "780160  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...   N/A         4   \n",
       "780161  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...   N/A         4   \n",
       "\n",
       "                ligands                      source  resolution  \\\n",
       "0                 [SPM]                                    1.90   \n",
       "1                 [SPM]                                    1.90   \n",
       "2         [CBR, MG, NT]                                    2.25   \n",
       "3         [CBR, MG, NT]                                    2.25   \n",
       "4       [HEM, NBN, SO4]            Physeter catodon        2.07   \n",
       "...                 ...                         ...         ...   \n",
       "780157        [DFR, MN]    Streptomyces rubiginosus        1.90   \n",
       "780158        [MN, XLS]  Actinoplanes missouriensis        2.40   \n",
       "780159        [MN, XLS]  Actinoplanes missouriensis        2.40   \n",
       "780160        [MN, XLS]  Actinoplanes missouriensis        2.40   \n",
       "780161        [MN, XLS]  Actinoplanes missouriensis        2.40   \n",
       "\n",
       "       deposition_date experiment_type  pdb_file_available  \n",
       "0           1994-12-05     diffraction                True  \n",
       "1           1994-12-05     diffraction                True  \n",
       "2           1994-12-14     diffraction                True  \n",
       "3           1994-12-14     diffraction                True  \n",
       "4           1997-12-13     diffraction                True  \n",
       "...                ...             ...                 ...  \n",
       "780157      1990-10-11     diffraction                True  \n",
       "780158      1992-04-03     diffraction                True  \n",
       "780159      1992-04-03     diffraction                True  \n",
       "780160      1992-04-03     diffraction                True  \n",
       "780161      1992-04-03     diffraction                True  \n",
       "\n",
       "[780162 rows x 15 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "manager.df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>pdb</th>\n",
       "      <th>chain</th>\n",
       "      <th>length</th>\n",
       "      <th>molecule_type</th>\n",
       "      <th>name</th>\n",
       "      <th>sequence</th>\n",
       "      <th>split</th>\n",
       "      <th>n_chains</th>\n",
       "      <th>ligands</th>\n",
       "      <th>source</th>\n",
       "      <th>resolution</th>\n",
       "      <th>deposition_date</th>\n",
       "      <th>experiment_type</th>\n",
       "      <th>pdb_file_available</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100d_A</td>\n",
       "      <td>100d</td>\n",
       "      <td>A</td>\n",
       "      <td>10</td>\n",
       "      <td>na</td>\n",
       "      <td>DNA/RNA (5'-R(*CP*)-D(*CP*GP*GP*CP*GP*CP*CP*GP...</td>\n",
       "      <td>CCGGCGCCGG</td>\n",
       "      <td>N/A</td>\n",
       "      <td>2</td>\n",
       "      <td>[SPM]</td>\n",
       "      <td></td>\n",
       "      <td>1.90</td>\n",
       "      <td>1994-12-05</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100d_B</td>\n",
       "      <td>100d</td>\n",
       "      <td>B</td>\n",
       "      <td>10</td>\n",
       "      <td>na</td>\n",
       "      <td>DNA/RNA (5'-R(*CP*)-D(*CP*GP*GP*CP*GP*CP*CP*GP...</td>\n",
       "      <td>CCGGCGCCGG</td>\n",
       "      <td>N/A</td>\n",
       "      <td>2</td>\n",
       "      <td>[SPM]</td>\n",
       "      <td></td>\n",
       "      <td>1.90</td>\n",
       "      <td>1994-12-05</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>101d_A</td>\n",
       "      <td>101d</td>\n",
       "      <td>A</td>\n",
       "      <td>12</td>\n",
       "      <td>na</td>\n",
       "      <td>DNA (5'-D(*CP*GP*CP*GP*AP*AP*TP*TP*(CBR)P*GP*C...</td>\n",
       "      <td>CGCGAATTCGCG</td>\n",
       "      <td>N/A</td>\n",
       "      <td>2</td>\n",
       "      <td>[CBR, MG, NT]</td>\n",
       "      <td></td>\n",
       "      <td>2.25</td>\n",
       "      <td>1994-12-14</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>101d_B</td>\n",
       "      <td>101d</td>\n",
       "      <td>B</td>\n",
       "      <td>12</td>\n",
       "      <td>na</td>\n",
       "      <td>DNA (5'-D(*CP*GP*CP*GP*AP*AP*TP*TP*(CBR)P*GP*C...</td>\n",
       "      <td>CGCGAATTCGCG</td>\n",
       "      <td>N/A</td>\n",
       "      <td>2</td>\n",
       "      <td>[CBR, MG, NT]</td>\n",
       "      <td></td>\n",
       "      <td>2.25</td>\n",
       "      <td>1994-12-14</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>101m_A</td>\n",
       "      <td>101m</td>\n",
       "      <td>A</td>\n",
       "      <td>154</td>\n",
       "      <td>protein</td>\n",
       "      <td>MYOGLOBIN</td>\n",
       "      <td>MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDR...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[HEM, NBN, SO4]</td>\n",
       "      <td>Physeter catodon</td>\n",
       "      <td>2.07</td>\n",
       "      <td>1997-12-13</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>780157</th>\n",
       "      <td>9xia_A</td>\n",
       "      <td>9xia</td>\n",
       "      <td>A</td>\n",
       "      <td>388</td>\n",
       "      <td>protein</td>\n",
       "      <td>XYLOSE ISOMERASE</td>\n",
       "      <td>MNYQPTPEDRFTFGLWTVGWQGRDPFGDATRRALDPVESVQRLAEL...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[DFR, MN]</td>\n",
       "      <td>Streptomyces rubiginosus</td>\n",
       "      <td>1.90</td>\n",
       "      <td>1990-10-11</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>780158</th>\n",
       "      <td>9xim_A</td>\n",
       "      <td>9xim</td>\n",
       "      <td>A</td>\n",
       "      <td>393</td>\n",
       "      <td>protein</td>\n",
       "      <td>D-XYLOSE ISOMERASE</td>\n",
       "      <td>SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>4</td>\n",
       "      <td>[MN, XLS]</td>\n",
       "      <td>Actinoplanes missouriensis</td>\n",
       "      <td>2.40</td>\n",
       "      <td>1992-04-03</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>780159</th>\n",
       "      <td>9xim_B</td>\n",
       "      <td>9xim</td>\n",
       "      <td>B</td>\n",
       "      <td>393</td>\n",
       "      <td>protein</td>\n",
       "      <td>D-XYLOSE ISOMERASE</td>\n",
       "      <td>SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>4</td>\n",
       "      <td>[MN, XLS]</td>\n",
       "      <td>Actinoplanes missouriensis</td>\n",
       "      <td>2.40</td>\n",
       "      <td>1992-04-03</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>780160</th>\n",
       "      <td>9xim_C</td>\n",
       "      <td>9xim</td>\n",
       "      <td>C</td>\n",
       "      <td>393</td>\n",
       "      <td>protein</td>\n",
       "      <td>D-XYLOSE ISOMERASE</td>\n",
       "      <td>SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>4</td>\n",
       "      <td>[MN, XLS]</td>\n",
       "      <td>Actinoplanes missouriensis</td>\n",
       "      <td>2.40</td>\n",
       "      <td>1992-04-03</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>780161</th>\n",
       "      <td>9xim_D</td>\n",
       "      <td>9xim</td>\n",
       "      <td>D</td>\n",
       "      <td>393</td>\n",
       "      <td>protein</td>\n",
       "      <td>D-XYLOSE ISOMERASE</td>\n",
       "      <td>SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>4</td>\n",
       "      <td>[MN, XLS]</td>\n",
       "      <td>Actinoplanes missouriensis</td>\n",
       "      <td>2.40</td>\n",
       "      <td>1992-04-03</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>780162 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            id   pdb chain  length molecule_type  \\\n",
       "0       100d_A  100d     A      10            na   \n",
       "1       100d_B  100d     B      10            na   \n",
       "2       101d_A  101d     A      12            na   \n",
       "3       101d_B  101d     B      12            na   \n",
       "4       101m_A  101m     A     154       protein   \n",
       "...        ...   ...   ...     ...           ...   \n",
       "780157  9xia_A  9xia     A     388       protein   \n",
       "780158  9xim_A  9xim     A     393       protein   \n",
       "780159  9xim_B  9xim     B     393       protein   \n",
       "780160  9xim_C  9xim     C     393       protein   \n",
       "780161  9xim_D  9xim     D     393       protein   \n",
       "\n",
       "                                                     name  \\\n",
       "0       DNA/RNA (5'-R(*CP*)-D(*CP*GP*GP*CP*GP*CP*CP*GP...   \n",
       "1       DNA/RNA (5'-R(*CP*)-D(*CP*GP*GP*CP*GP*CP*CP*GP...   \n",
       "2       DNA (5'-D(*CP*GP*CP*GP*AP*AP*TP*TP*(CBR)P*GP*C...   \n",
       "3       DNA (5'-D(*CP*GP*CP*GP*AP*AP*TP*TP*(CBR)P*GP*C...   \n",
       "4                                               MYOGLOBIN   \n",
       "...                                                   ...   \n",
       "780157                                   XYLOSE ISOMERASE   \n",
       "780158                                 D-XYLOSE ISOMERASE   \n",
       "780159                                 D-XYLOSE ISOMERASE   \n",
       "780160                                 D-XYLOSE ISOMERASE   \n",
       "780161                                 D-XYLOSE ISOMERASE   \n",
       "\n",
       "                                                 sequence split  n_chains  \\\n",
       "0                                              CCGGCGCCGG   N/A         2   \n",
       "1                                              CCGGCGCCGG   N/A         2   \n",
       "2                                            CGCGAATTCGCG   N/A         2   \n",
       "3                                            CGCGAATTCGCG   N/A         2   \n",
       "4       MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDR...   N/A         1   \n",
       "...                                                   ...   ...       ...   \n",
       "780157  MNYQPTPEDRFTFGLWTVGWQGRDPFGDATRRALDPVESVQRLAEL...   N/A         1   \n",
       "780158  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...   N/A         4   \n",
       "780159  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...   N/A         4   \n",
       "780160  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...   N/A         4   \n",
       "780161  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...   N/A         4   \n",
       "\n",
       "                ligands                      source  resolution  \\\n",
       "0                 [SPM]                                    1.90   \n",
       "1                 [SPM]                                    1.90   \n",
       "2         [CBR, MG, NT]                                    2.25   \n",
       "3         [CBR, MG, NT]                                    2.25   \n",
       "4       [HEM, NBN, SO4]            Physeter catodon        2.07   \n",
       "...                 ...                         ...         ...   \n",
       "780157        [DFR, MN]    Streptomyces rubiginosus        1.90   \n",
       "780158        [MN, XLS]  Actinoplanes missouriensis        2.40   \n",
       "780159        [MN, XLS]  Actinoplanes missouriensis        2.40   \n",
       "780160        [MN, XLS]  Actinoplanes missouriensis        2.40   \n",
       "780161        [MN, XLS]  Actinoplanes missouriensis        2.40   \n",
       "\n",
       "       deposition_date experiment_type  pdb_file_available  \n",
       "0           1994-12-05     diffraction                True  \n",
       "1           1994-12-05     diffraction                True  \n",
       "2           1994-12-14     diffraction                True  \n",
       "3           1994-12-14     diffraction                True  \n",
       "4           1997-12-13     diffraction                True  \n",
       "...                ...             ...                 ...  \n",
       "780157      1990-10-11     diffraction                True  \n",
       "780158      1992-04-03     diffraction                True  \n",
       "780159      1992-04-03     diffraction                True  \n",
       "780160      1992-04-03     diffraction                True  \n",
       "780161      1992-04-03     diffraction                True  \n",
       "\n",
       "[780162 rows x 15 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "manager.source"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Selection Properties"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Num chains:  780162\n",
      "Num unique pdbs:  202738\n",
      "Longest chain:  16181\n",
      "Shortest chain:  1\n",
      "Best Resolution:  -1.0\n",
      "Worst Resolution:  70.0\n",
      "Experiment Types:  ['diffraction' 'NMR' 'other' 'EM']\n",
      "Molecule Types:  ['na' 'protein']\n"
     ]
    }
   ],
   "source": [
    "print(\"Num chains: \", manager.get_num_chains())\n",
    "print(\"Num unique pdbs: \", manager.get_num_unique_pdbs())\n",
    "print(\"Longest chain: \", manager.get_longest_chain())\n",
    "print(\"Shortest chain: \", manager.get_shortest_chain())\n",
    "print(\"Best Resolution: \", manager.get_best_resolution())\n",
    "print(\"Worst Resolution: \", manager.get_worst_resolution())\n",
    "print(\"Experiment Types: \", manager.get_experiment_types())\n",
    "print(\"Molecule Types: \", manager.get_molecule_types())"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Making Selections\n",
    "\n",
    "Selection functions return a pd.DataFrame. All selection functions provide an `update: bool` argument controlling whether or not `manager.df` is updated in place:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of chains:  780162\n",
      "Hi res structure:  202476\n",
      "Not modified: 780162\n",
      "Modified inplace: 202476\n"
     ]
    }
   ],
   "source": [
    "print(\"Number of chains: \", len(manager.df))\n",
    "\n",
    "print(\"Hi res structure: \", len(manager.resolution_better_than_or_equal_to(2.0)))\n",
    "print(f\"Not modified: {len(manager.df)}\")\n",
    "\n",
    "# Update inplace\n",
    "manager.resolution_better_than_or_equal_to(2.0, update=True)\n",
    "print(f\"Modified inplace: {len(manager.df)}\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If you want to reset the selection:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "780162\n"
     ]
    }
   ],
   "source": [
    "manager.reset()\n",
    "print(len(manager.df))"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here is an example selection:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>pdb</th>\n",
       "      <th>chain</th>\n",
       "      <th>length</th>\n",
       "      <th>molecule_type</th>\n",
       "      <th>name</th>\n",
       "      <th>sequence</th>\n",
       "      <th>split</th>\n",
       "      <th>n_chains</th>\n",
       "      <th>ligands</th>\n",
       "      <th>source</th>\n",
       "      <th>resolution</th>\n",
       "      <th>deposition_date</th>\n",
       "      <th>experiment_type</th>\n",
       "      <th>pdb_file_available</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>19383</th>\n",
       "      <td>1hhz_D</td>\n",
       "      <td>1hhz</td>\n",
       "      <td>D</td>\n",
       "      <td>5</td>\n",
       "      <td>protein</td>\n",
       "      <td>CELL WALL PEPTIDE</td>\n",
       "      <td>AEKAA</td>\n",
       "      <td>N/A</td>\n",
       "      <td>6</td>\n",
       "      <td>[3FG, DAL, DVC, FGA, GHP, MLU, OMY, OMZ, PGR]</td>\n",
       "      <td></td>\n",
       "      <td>0.99</td>\n",
       "      <td>2000-12-29</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19384</th>\n",
       "      <td>1hhz_E</td>\n",
       "      <td>1hhz</td>\n",
       "      <td>E</td>\n",
       "      <td>5</td>\n",
       "      <td>protein</td>\n",
       "      <td>CELL WALL PEPTIDE</td>\n",
       "      <td>AEKAA</td>\n",
       "      <td>N/A</td>\n",
       "      <td>6</td>\n",
       "      <td>[3FG, DAL, DVC, FGA, GHP, MLU, OMY, OMZ, PGR]</td>\n",
       "      <td></td>\n",
       "      <td>0.99</td>\n",
       "      <td>2000-12-29</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19385</th>\n",
       "      <td>1hhz_F</td>\n",
       "      <td>1hhz</td>\n",
       "      <td>F</td>\n",
       "      <td>5</td>\n",
       "      <td>protein</td>\n",
       "      <td>CELL WALL PEPTIDE</td>\n",
       "      <td>AEKAA</td>\n",
       "      <td>N/A</td>\n",
       "      <td>6</td>\n",
       "      <td>[3FG, DAL, DVC, FGA, GHP, MLU, OMY, OMZ, PGR]</td>\n",
       "      <td></td>\n",
       "      <td>0.99</td>\n",
       "      <td>2000-12-29</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50598</th>\n",
       "      <td>1sha_B</td>\n",
       "      <td>1sha</td>\n",
       "      <td>B</td>\n",
       "      <td>5</td>\n",
       "      <td>protein</td>\n",
       "      <td>PHOSPHOPEPTIDE A</td>\n",
       "      <td>YVPML</td>\n",
       "      <td>N/A</td>\n",
       "      <td>2</td>\n",
       "      <td>[PTR]</td>\n",
       "      <td>Rous sarcoma virus</td>\n",
       "      <td>1.50</td>\n",
       "      <td>1992-08-18</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50810</th>\n",
       "      <td>1skg_B</td>\n",
       "      <td>1skg</td>\n",
       "      <td>B</td>\n",
       "      <td>5</td>\n",
       "      <td>protein</td>\n",
       "      <td>VAFRS</td>\n",
       "      <td>VAFRS</td>\n",
       "      <td>N/A</td>\n",
       "      <td>2</td>\n",
       "      <td>[MOH, SO4]</td>\n",
       "      <td>Daboia russellii pulchella; SYNTHETIC CONSTRUCT</td>\n",
       "      <td>1.21</td>\n",
       "      <td>2004-03-04</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>653289</th>\n",
       "      <td>7kpu_B</td>\n",
       "      <td>7kpu</td>\n",
       "      <td>B</td>\n",
       "      <td>5</td>\n",
       "      <td>protein</td>\n",
       "      <td>bisubstrate analogue (CMC-ACE-SER-GLY-ARG-GLY-...</td>\n",
       "      <td>SGRGK</td>\n",
       "      <td>N/A</td>\n",
       "      <td>4</td>\n",
       "      <td>[ACE, BTB, GOL, NH2, SO4, WZG]</td>\n",
       "      <td>Homo sapiens; SYNTHETIC CONSTRUCT</td>\n",
       "      <td>1.43</td>\n",
       "      <td>2020-11-12</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>682420</th>\n",
       "      <td>7oju_H</td>\n",
       "      <td>7oju</td>\n",
       "      <td>H</td>\n",
       "      <td>5</td>\n",
       "      <td>protein</td>\n",
       "      <td>MVNAL Peptide</td>\n",
       "      <td>MVNAL</td>\n",
       "      <td>N/A</td>\n",
       "      <td>2</td>\n",
       "      <td>[CMC, GOL, P6G]</td>\n",
       "      <td>Chaetomium thermophilum (strain DSM 1495 / CBS...</td>\n",
       "      <td>1.10</td>\n",
       "      <td>2021-05-17</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>694077</th>\n",
       "      <td>7pul_P</td>\n",
       "      <td>7pul</td>\n",
       "      <td>P</td>\n",
       "      <td>5</td>\n",
       "      <td>protein</td>\n",
       "      <td>GLY-ALA-GLY-ALA-ALA</td>\n",
       "      <td>GAGAA</td>\n",
       "      <td>N/A</td>\n",
       "      <td>2</td>\n",
       "      <td>[CA, MG]</td>\n",
       "      <td>Enterococcus faecalis</td>\n",
       "      <td>1.40</td>\n",
       "      <td>2021-09-30</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>745627</th>\n",
       "      <td>7x70_B</td>\n",
       "      <td>7x70</td>\n",
       "      <td>B</td>\n",
       "      <td>5</td>\n",
       "      <td>protein</td>\n",
       "      <td>peptide</td>\n",
       "      <td>AVKLQ</td>\n",
       "      <td>N/A</td>\n",
       "      <td>2</td>\n",
       "      <td>[]</td>\n",
       "      <td>Homo sapiens; SYNTHETIC CONSTRUCT</td>\n",
       "      <td>1.25</td>\n",
       "      <td>2022-03-08</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>755416</th>\n",
       "      <td>7z5z_C</td>\n",
       "      <td>7z5z</td>\n",
       "      <td>C</td>\n",
       "      <td>5</td>\n",
       "      <td>protein</td>\n",
       "      <td>UDP-MurNAc-pentapeptide</td>\n",
       "      <td>AECAA</td>\n",
       "      <td>N/A</td>\n",
       "      <td>3</td>\n",
       "      <td>[A9Z, DAL, FGA, GOL, MUB, UDP]</td>\n",
       "      <td>Weissella viridescens; SYNTHETIC CONSTRUCT</td>\n",
       "      <td>1.49</td>\n",
       "      <td>2022-03-10</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>80 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            id   pdb chain  length molecule_type  \\\n",
       "19383   1hhz_D  1hhz     D       5       protein   \n",
       "19384   1hhz_E  1hhz     E       5       protein   \n",
       "19385   1hhz_F  1hhz     F       5       protein   \n",
       "50598   1sha_B  1sha     B       5       protein   \n",
       "50810   1skg_B  1skg     B       5       protein   \n",
       "...        ...   ...   ...     ...           ...   \n",
       "653289  7kpu_B  7kpu     B       5       protein   \n",
       "682420  7oju_H  7oju     H       5       protein   \n",
       "694077  7pul_P  7pul     P       5       protein   \n",
       "745627  7x70_B  7x70     B       5       protein   \n",
       "755416  7z5z_C  7z5z     C       5       protein   \n",
       "\n",
       "                                                     name sequence split  \\\n",
       "19383                                   CELL WALL PEPTIDE    AEKAA   N/A   \n",
       "19384                                   CELL WALL PEPTIDE    AEKAA   N/A   \n",
       "19385                                   CELL WALL PEPTIDE    AEKAA   N/A   \n",
       "50598                                    PHOSPHOPEPTIDE A    YVPML   N/A   \n",
       "50810                                               VAFRS    VAFRS   N/A   \n",
       "...                                                   ...      ...   ...   \n",
       "653289  bisubstrate analogue (CMC-ACE-SER-GLY-ARG-GLY-...    SGRGK   N/A   \n",
       "682420                                      MVNAL Peptide    MVNAL   N/A   \n",
       "694077                                GLY-ALA-GLY-ALA-ALA    GAGAA   N/A   \n",
       "745627                                            peptide    AVKLQ   N/A   \n",
       "755416                            UDP-MurNAc-pentapeptide    AECAA   N/A   \n",
       "\n",
       "        n_chains                                        ligands  \\\n",
       "19383          6  [3FG, DAL, DVC, FGA, GHP, MLU, OMY, OMZ, PGR]   \n",
       "19384          6  [3FG, DAL, DVC, FGA, GHP, MLU, OMY, OMZ, PGR]   \n",
       "19385          6  [3FG, DAL, DVC, FGA, GHP, MLU, OMY, OMZ, PGR]   \n",
       "50598          2                                          [PTR]   \n",
       "50810          2                                     [MOH, SO4]   \n",
       "...          ...                                            ...   \n",
       "653289         4                 [ACE, BTB, GOL, NH2, SO4, WZG]   \n",
       "682420         2                                [CMC, GOL, P6G]   \n",
       "694077         2                                       [CA, MG]   \n",
       "745627         2                                             []   \n",
       "755416         3                 [A9Z, DAL, FGA, GOL, MUB, UDP]   \n",
       "\n",
       "                                                   source  resolution  \\\n",
       "19383                                                            0.99   \n",
       "19384                                                            0.99   \n",
       "19385                                                            0.99   \n",
       "50598                                  Rous sarcoma virus        1.50   \n",
       "50810     Daboia russellii pulchella; SYNTHETIC CONSTRUCT        1.21   \n",
       "...                                                   ...         ...   \n",
       "653289                  Homo sapiens; SYNTHETIC CONSTRUCT        1.43   \n",
       "682420  Chaetomium thermophilum (strain DSM 1495 / CBS...        1.10   \n",
       "694077                              Enterococcus faecalis        1.40   \n",
       "745627                  Homo sapiens; SYNTHETIC CONSTRUCT        1.25   \n",
       "755416         Weissella viridescens; SYNTHETIC CONSTRUCT        1.49   \n",
       "\n",
       "       deposition_date experiment_type  pdb_file_available  \n",
       "19383       2000-12-29     diffraction                True  \n",
       "19384       2000-12-29     diffraction                True  \n",
       "19385       2000-12-29     diffraction                True  \n",
       "50598       1992-08-18     diffraction                True  \n",
       "50810       2004-03-04     diffraction                True  \n",
       "...                ...             ...                 ...  \n",
       "653289      2020-11-12     diffraction                True  \n",
       "682420      2021-05-17     diffraction                True  \n",
       "694077      2021-09-30     diffraction                True  \n",
       "745627      2022-03-08     diffraction                True  \n",
       "755416      2022-03-10     diffraction                True  \n",
       "\n",
       "[80 rows x 15 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "manager.length_shorter_than(6, update=True)\n",
    "manager.length_longer_than(4, update=True)\n",
    "manager.molecule_type(\"protein\", update=True)\n",
    "manager.resolution_better_than_or_equal_to(1.5, update=True)\n",
    "manager.experiment_type(\"diffraction\", update=True)\n",
    "manager.remove_non_standard_alphabet_sequences(update=True)\n",
    "manager.df"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Creating train/val/test splits\n",
    "\n",
    "Splits can be created with through a few different strategies:\n",
    "\n",
    "* temporal\n",
    "* sequence similarity (using MMSeqs2)\n",
    "* structural similarity (coming soon)\n",
    "\n",
    "\n",
    "Multiple types of splitting operations can be composed. For example, one can perform a time-based split after clustering sequences into train/val/test splits without corrupting the sequence-based splits.\n",
    "\n",
    "### Temporal splits\n",
    "\n",
    "We can create train, val and test splits based on deposition time:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[03/30/23 20:27:05] </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Found <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3749</span> PDB files unavailable for download.                         <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#527\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">527</span></a>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m[03/30/23 20:27:05]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Found \u001b[1;36m3749\u001b[0m PDB files unavailable for download.                         \u001b]8;id=146166;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=955387;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#527\u001b\\\u001b[2m527\u001b[0m\u001b]8;;\u001b\\\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'1bos': False, '1vvj': False, '1vy4': False, '1vy5': False, '1vy6': False, '1vy7': False, '2btj': False, '2vvj': False, '3j3q': False, '3j3y': False, '3j6b': False, '3j6x': False, '3j6y': False, '3j77': False, '3j78': False, '3j79': False, '3j7o': False, '3j7p': False, '3j7q': False, '3j7r': False, '3j8h': False, '3j92': False, '3j9g': False, '3j9k': False, '3j9l': False, '3j9m': False, '3j9r': False, '3j9w': False, '3j9y': False, '3j9z': False, '3ja1': False, '3jag': False, '3jah': False, '3jai': False, '3jaj': False, '3jan': False, '3jbn': False, '3jbo': False, '3jbp': False, '3jbu': False, '3jbv': False, '3jc1': False, '3jc8': False, '3jc9': False, '3jcd': False, '3jce': False, '3jcj': False, '3jcn': False, '3jco': False, '3jcp': False, '3jcs': False, '3jct': False, '3k1q': False, '3whe': False, '4abz': False, '4bp7': False, '4bts': False, '4ctf': False, '4ctg': False, '4d5y': False, '4d67': False, '4dcb': False, '4fqr': False, '4frt': False, '4l47': False, '4l71': False, '4lel': False, '4lfz': False, '4lnt': False, '4lsk': False, '4lt8': False, '4nwr': False, '4o9y': False, '4p6f': False, '4p70': False, '4qyk': False, '4tua': False, '4tub': False, '4tuc': False, '4tud': False, '4tue': False, '4tvx': False, '4u1u': False, '4u1v': False, '4u20': False, '4u24': False, '4u25': False, '4u26': False, '4u27': False, '4u3m': False, '4u3n': False, '4u3u': False, '4u4n': False, '4u4o': False, '4u4q': False, '4u4r': False, '4u4u': False, '4u4y': False, '4u4z': False, '4u50': False, '4u51': False, '4u52': False, '4u53': False, '4u55': False, '4u56': False, '4u6f': False, '4udf': False, '4ue3': False, '4ug0': False, '4ujc': False, '4ujd': False, '4uje': False, '4v3p': False, '4v40': False, '4v41': False, '4v42': False, '4v43': False, '4v44': False, '4v45': False, '4v46': False, '4v47': False, '4v48': False, '4v49': False, '4v4a': False, '4v4b': False, '4v4c': False, '4v4d': False, '4v4e': False, '4v4f': False, '4v4g': False, '4v4h': False, '4v4i': False, '4v4j': False, '4v4k': False, '4v4l': False, '4v4m': False, '4v4n': False, '4v4o': False, '4v4p': False, '4v4q': False, '4v4r': False, '4v4s': False, '4v4t': False, '4v4u': False, '4v4v': False, '4v4w': False, '4v4x': False, '4v4y': False, '4v4z': False, '4v50': False, '4v51': False, '4v52': False, '4v53': False, '4v54': False, '4v55': False, '4v56': False, '4v57': False, '4v58': False, '4v59': False, '4v5a': False, '4v5b': False, '4v5c': False, '4v5d': False, '4v5e': False, '4v5f': False, '4v5g': False, '4v5h': False, '4v5i': False, '4v5j': False, '4v5k': False, '4v5l': False, '4v5m': False, '4v5n': False, '4v5o': False, '4v5p': False, '4v5q': False, '4v5r': False, '4v5s': False, '4v5t': False, '4v5v': False, '4v5w': False, '4v5x': False, '4v5y': False, '4v5z': False, '4v60': False, '4v61': False, '4v62': False, '4v63': False, '4v64': False, '4v65': False, '4v66': False, '4v67': False, '4v68': False, '4v69': False, '4v6a': False, '4v6b': False, '4v6c': False, '4v6d': False, '4v6e': False, '4v6f': False, '4v6g': False, '4v6h': False, '4v6i': False, '4v6k': False, '4v6l': False, '4v6m': False, '4v6n': False, '4v6o': False, '4v6p': False, '4v6q': False, '4v6r': False, '4v6s': False, '4v6t': False, '4v6u': False, '4v6v': False, '4v6w': False, '4v6x': False, '4v6y': False, '4v6z': False, '4v70': False, '4v71': False, '4v72': False, '4v73': False, '4v74': False, '4v75': False, '4v76': False, '4v77': False, '4v78': False, '4v79': False, '4v7a': False, '4v7b': False, '4v7c': False, '4v7d': False, '4v7e': False, '4v7g': False, '4v7h': False, '4v7i': False, '4v7j': False, '4v7k': False, '4v7l': False, '4v7m': False, '4v7n': False, '4v7o': False, '4v7p': False, '4v7q': False, '4v7r': False, '4v7s': False, '4v7t': False, '4v7u': False, '4v7v': False, '4v7w': False, '4v7x': False, '4v7y': False, '4v7z': False, '4v81': False, '4v82': False, '4v83': False, '4v84': False, '4v85': False, '4v86': False, '4v87': False, '4v88': False, '4v89': False, '4v8a': False, '4v8b': False, '4v8c': False, '4v8d': False, '4v8e': False, '4v8f': False, '4v8g': False, '4v8h': False, '4v8i': False, '4v8j': False, '4v8k': False, '4v8l': False, '4v8m': False, '4v8n': False, '4v8o': False, '4v8p': False, '4v8q': False, '4v8r': False, '4v8s': False, '4v8t': False, '4v8u': False, '4v8v': False, '4v8w': False, '4v8x': False, '4v8y': False, '4v8z': False, '4v90': False, '4v91': False, '4v92': False, '4v93': False, '4v94': False, '4v95': False, '4v96': False, '4v97': False, '4v98': False, '4v99': False, '4v9a': False, '4v9b': False, '4v9c': False, '4v9d': False, '4v9e': False, '4v9f': False, '4v9g': False, '4v9h': False, '4v9i': False, '4v9j': False, '4v9k': False, '4v9l': False, '4v9m': False, '4v9n': False, '4v9o': False, '4v9p': False, '4v9q': False, '4v9r': False, '4v9s': False, '4w29': False, '4w2e': False, '4w2f': False, '4w2g': False, '4w2h': False, '4w2i': False, '4w4g': False, '4wf1': False, '4wiz': False, '4woi': False, '4wpo': False, '4wq1': False, '4wqf': False, '4wqr': False, '4wqu': False, '4wqy': False, '4wr6': False, '4wra': False, '4wro': False, '4wsd': False, '4wsm': False, '4wsn': False, '4wt1': False, '4wt8': False, '4wu1': False, '4www': False, '4wz7': False, '4wzd': False, '4wzj': False, '4wzo': False, '4xej': False, '4y4o': False, '4y4p': False, '4ybb': False, '4yd9': False, '4ym7': False, '4ypb': False, '4yuu': False, '4yzv': False, '4z3s': False, '4z8c': False, '4zer': False, '4zsn': False, '5a9z': False, '5aa0': False, '5aco': False, '5afi': False, '5aj0': False, '5aj4': False, '5apn': False, '5apo': False, '5b5m': False, '5b5n': False, '5bkl': False, '5bkn': False, '5bkq': False, '5bp4': False, '5c1a': False, '5cod': False, '5czp': False, '5d8b': False, '5dat': False, '5dc3': False, '5dfe': False, '5dge': False, '5dgf': False, '5dgv': False, '5dox': False, '5doy': False, '5e7k': False, '5e81': False, '5el4': False, '5el5': False, '5el6': False, '5el7': False, '5epi': False, '5euj': False, '5exc': False, '5f8k': False, '5fci': False, '5fcj': False, '5fdu': False, '5fdv': False, '5fki': False, '5fuu': False, '5gak': False, '5gjr': False, '5gky': False, '5gkz': False, '5gl0': False, '5gl1': False, '5gm6': False, '5go9': False, '5goa': False, '5gpn': False, '5gup': False, '5h4p': False, '5h5u': False, '5hau': False, '5hcp': False, '5hcq': False, '5hcr': False, '5hd1': False, '5i4l': False, '5ib7': False, '5ib8': False, '5ibb': False, '5imq': False, '5imr': False, '5ipi': False, '5ipk': False, '5iqr': False, '5it7': False, '5it8': False, '5iv5': False, '5iv7': False, '5j30': False, '5j3c': False, '5j4b': False, '5j4c': False, '5j4d': False, '5j4z': False, '5j5b': False, '5j7l': False, '5j7v': False, '5j7y': False, '5j88': False, '5j8a': False, '5j8b': False, '5j8k': False, '5j91': False, '5jc9': False, '5jcs': False, '5jte': False, '5ju8': False, '5jul': False, '5juo': False, '5jup': False, '5jus': False, '5jut': False, '5juu': False, '5kcr': False, '5kcs': False, '5kps': False, '5kpv': False, '5kpw': False, '5kpx': False, '5l1d': False, '5l3p': False, '5l4g': False, '5leg': False, '5ler': False, '5lfb': False, '5li0': False, '5lks': False, '5lqp': False, '5lyb': False, '5lza': False, '5lzb': False, '5lzc': False, '5lzd': False, '5lze': False, '5lzf': False, '5lzs': False, '5lzt': False, '5lzu': False, '5lzv': False, '5lzw': False, '5lzx': False, '5lzy': False, '5lzz': False, '5m1j': False, '5mc6': False, '5mdv': False, '5mdw': False, '5mdy': False, '5mdz': False, '5mei': False, '5mgp': False, '5mkl': False, '5mmm': False, '5mpb': False, '5mpc': False, '5mq3': False, '5mq7': False, '5mrc': False, '5mre': False, '5mrf': False, '5mx7': False, '5myj': False, '5nco': False, '5nd8': False, '5nd9': False, '5ndg': False, '5ndj': False, '5ndk': False, '5ndv': False, '5ndw': False, '5ngm': False, '5njt': False, '5nmb': False, '5np6': False, '5nrl': False, '5nwy': False, '5o09': False, '5o2r': False, '5o61': False, '5obm': False, '5ojq': False, '5on6': False, '5oql': False, '5ot7': False, '5szs': False, '5t0c': False, '5t15': False, '5t2a': False, '5t2c': False, '5t5h': False, '5t61': False, '5t62': False, '5t6r': False, '5t7v': False, '5t9m': False, '5t9n': False, '5t9r': False, '5t9s': False, '5t9v': False, '5ta3': False, '5tal': False, '5tam': False, '5tan': False, '5tap': False, '5taq': False, '5tas': False, '5tat': False, '5tau': False, '5tav': False, '5taw': False, '5tax': False, '5tay': False, '5taz': False, '5tb0': False, '5tb1': False, '5tb2': False, '5tb3': False, '5tb4': False, '5tbw': False, '5tcr': False, '5tcu': False, '5tga': False, '5tgm': False, '5tx1': False, '5u4i': False, '5u9f': False, '5u9g': False, '5umd': False, '5uot': False, '5uq7': False, '5uq8': False, '5urf': False, '5urw': False, '5urx': False, '5us7': False, '5us9': False, '5uyk': False, '5uyl': False, '5uym': False, '5uyn': False, '5uyp': False, '5uyq': False, '5v74': False, '5v8i': False, '5v93': False, '5vfp': False, '5vfq': False, '5vfs': False, '5vfu': False, '5vku': False, '5vlz': False, '5vp2': False, '5vpo': False, '5vpp': False, '5vyc': False, '5w4k': False, '5wdt': False, '5we4': False, '5we6': False, '5wf0': False, '5wfk': False, '5wfs': False, '5wis': False, '5wit': False, '5wju': False, '5wjv': False, '5wjw': False, '5wjx': False, '5wjy': False, '5wjz': False, '5wk5': False, '5wk6': False, '5wlc': False, '5wp9': False, '5wvi': False, '5wvk': False, '5wyj': False, '5wyk': False, '5x8p': False, '5xjc': False, '5xth': False, '5xti': False, '5xxb': False, '5xy3': False, '5y6p': False, '5ydt': False, '5yzg': False, '5z56': False, '5z57': False, '5zap': False, '5zeb': False, '5zep': False, '5zf0': False, '5zlu': False, '5zwm': False, '5zz8': False, '6ahd': False, '6awb': False, '6awc': False, '6awd': False, '6az3': False, '6b1t': False, '6b43': False, '6b4v': False, '6b9q': False, '6bcu': False, '6bcx': False, '6boh': False, '6bok': False, '6bu8': False, '6buw': False, '6bwx': False, '6bx0': False, '6bx1': False, '6by1': False, '6by7': False, '6bz6': False, '6bz7': False, '6bz8': False, '6c4i': False, '6c50': False, '6c5l': False, '6cae': False, '6cbe': False, '6cde': False, '6cdi': False, '6cfj': False, '6cfk': False, '6cfl': False, '6cgr': False, '6cue': False, '6cuf': False, '6czr': False, '6d90': False, '6d9j': False, '6dhe': False, '6dhf': False, '6dhg': False, '6dhh': False, '6dho': False, '6dhp': False, '6dnc': False, '6dqj': False, '6dqn': False, '6dqs': False, '6dqv': False, '6dqz': False, '6dr0': False, '6dr2': False, '6dra': False, '6drc': False, '6dwu': False, '6dzi': False, '6dzu': False, '6e2r': False, '6e2x': False, '6e2z': False, '6e30': False, '6e32': False, '6e34': False, '6e39': False, '6e8g': False, '6e9d': False, '6ek5': False, '6ekc': False, '6elz': False, '6em1': False, '6enf': False, '6enj': False, '6enu': False, '6eri': False, '6fec': False, '6fg3': False, '6fkr': False, '6foo': False, '6frk': False, '6fsz': False, '6ft6': False, '6ftg': False, '6fti': False, '6ftj': False, '6fvt': False, '6fvu': False, '6fvv': False, '6fvw': False, '6fvx': False, '6fvy': False, '6fxc': False, '6fyx': False, '6fyy': False, '6g2h': False, '6g2i': False, '6g5k': False, '6g8h': False, '6gaw': False, '6gaz': False, '6gb2': False, '6gjc': False, '6gq1': False, '6gqb': False, '6gqv': False, '6gsj': False, '6gsk': False, '6gsl': False, '6gsm': False, '6gsn': False, '6gsr': False, '6gua': False, '6gwt': False, '6gxm': False, '6gxn': False, '6gxo': False, '6gxp': False, '6gz3': False, '6gz4': False, '6gz5': False, '6gzq': False, '6gzx': False, '6gzz': False, '6h03': False, '6h4n': False, '6h58': False, '6h5i': False, '6h6e': False, '6h6f': False, '6h8k': False, '6ha1': False, '6ha8': False, '6hcf': False, '6hcj': False, '6hcm': False, '6hcq': False, '6hcr': False, '6hd7': False, '6hhq': False, '6hht': False, '6hif': False, '6hiv': False, '6hiw': False, '6hix': False, '6hiy': False, '6hiz': False, '6hrm': False, '6ht7': False, '6htq': False, '6hxx': False, '6i46': False, '6i47': False, '6i48': False, '6i49': False, '6i4a': False, '6i7o': False, '6i7v': False, '6i9r': False, '6ia9': False, '6igc': False, '6ip5': False, '6ip6': False, '6ip8': False, '6j0n': False, '6j2c': False, '6j2n': False, '6j2q': False, '6j2x': False, '6j30': False, '6j3y': False, '6j3z': False, '6j40': False, '6j5k': False, '6jeo': False, '6jg3': False, '6jgz': False, '6jh6': False, '6jhn': False, '6ji0': False, '6ji8': False, '6jii': False, '6jiu': False, '6jiy': False, '6jlu': False, '6jrr': False, '6jrs': False, '6jv2': False, '6k33': False, '6k3i': False, '6kad': False, '6kaf': False, '6kco': False, '6ke6': False, '6kel': False, '6kem': False, '6ken': False, '6keo': False, '6kep': False, '6keq': False, '6ker': False, '6kgx': False, '6kif': False, '6kig': False, '6kmw': False, '6kmx': False, '6l4t': False, '6l4u': False, '6l9t': False, '6lgl': False, '6lgn': False, '6lkq': False, '6lqm': False, '6lqp': False, '6lqq': False, '6lqr': False, '6lqs': False, '6lqt': False, '6lqu': False, '6lqv': False, '6lsr': False, '6lss': False, '6lsy': False, '6lu8': False, '6m2w': False, '6m62': False, '6m8p': False, '6m99': False, '6mpg': False, '6mph': False, '6msb': False, '6msd': False, '6mse': False, '6msg': False, '6msh': False, '6msj': False, '6msk': False, '6mtb': False, '6mtc': False, '6mtd': False, '6mte': False, '6mzu': False, '6mzv': False, '6mzx': False, '6mzy': False, '6n06': False, '6n07': False, '6n09': False, '6n0f': False, '6n0g': False, '6n1d': False, '6n1v': False, '6n1w': False, '6n2d': False, '6n2y': False, '6n2z': False, '6n30': False, '6n4v': False, '6n8j': False, '6n8k': False, '6n8l': False, '6n8m': False, '6n8n': False, '6n8o': False, '6n9e': False, '6n9f': False, '6nc3': False, '6ncl': False, '6nd5': False, '6nd6': False, '6ndk': False, '6nf2': False, '6nhj': False, '6nm5': False, '6nsh': False, '6nta': False, '6nu2': False, '6nu3': False, '6nuo': False, '6nwa': False, '6nwy': False, '6nxe': False, '6o2s': False, '6o2t': False, '6o3m': False, '6o8w': False, '6o8x': False, '6o8y': False, '6o8z': False, '6o90': False, '6o97': False, '6o9j': False, '6o9k': False, '6o9r': False, '6of1': False, '6of6': False, '6ofx': False, '6og7': False, '6ogf': False, '6ogg': False, '6ogi': False, '6oif': False, '6oig': False, '6oj2': False, '6oj3': False, '6oj4': False, '6oj5': False, '6oj6': False, '6ola': False, '6ole': False, '6olf': False, '6olg': False, '6oli': False, '6olz': False, '6om0': False, '6om6': False, '6om7': False, '6ope': False, '6ord': False, '6ore': False, '6orl': False, '6orv': False, '6osi': False, '6osk': False, '6osq': False, '6ost': False, '6osy': False, '6ot1': False, '6ot3': False, '6otr': False, '6ouo': False, '6owf': False, '6owg': False, '6oxa': False, '6oxi': False, '6p5i': False, '6p5j': False, '6p5k': False, '6p5n': False, '6pem': False, '6pep': False, '6pj6': False, '6pto': False, '6pv6': False, '6pwb': False, '6q14': False, '6q15': False, '6q16': False, '6q1f': False, '6q3g': False, '6q7l': False, '6q7m': False, '6q8y': False, '6q95': False, '6q97': False, '6q98': False, '6q9a': False, '6q9b': False, '6q9d': False, '6q9e': False, '6qa9': False, '6qbx': False, '6qc2': False, '6qc3': False, '6qc4': False, '6qc5': False, '6qc6': False, '6qc7': False, '6qc8': False, '6qc9': False, '6qca': False, '6qcf': False, '6qcm': False, '6qdv': False, '6qi5': False, '6qik': False, '6ql5': False, '6ql6': False, '6ql7': False, '6ql9': False, '6qn1': False, '6qnq': False, '6qnr': False, '6qsw': False, '6qsx': False, '6qt0': False, '6qtz': False, '6qvk': False, '6qw6': False, '6qx7': False, '6qx9': False, '6qyd': False, '6qyj': False, '6qym': False, '6qz0': False, '6qz9': False, '6qza': False, '6qzc': False, '6qzd': False, '6qzf': False, '6qzp': False, '6r0e': False, '6r21': False, '6r5q': False, '6r6g': False, '6r6p': False, '6r7q': False, '6r83': False, '6r84': False, '6r86': False, '6r87': False, '6rav': False, '6ri5': False, '6rlw': False, '6rm3': False, '6rqc': False, '6rvv': False, '6rvw': False, '6rw4': False, '6rw5': False, '6rxt': False, '6rxu': False, '6rxv': False, '6rxx': False, '6rxy': False, '6rxz': False, '6rzz': False, '6s05': False, '6s0x': False, '6s0z': False, '6s12': False, '6s13': False, '6s2x': False, '6s47': False, '6s5v': False, '6sg2': False, '6sg9': False, '6sga': False, '6sgb': False, '6sgc': False, '6sgg': False, '6sgl': False, '6sgm': False, '6sgn': False, '6sh1': False, '6sh2': False, '6sjt': False, '6skf': False, '6skg': False, '6skl': False, '6skw': False, '6sl9': False, '6sla': False, '6slb': False, '6slc': False, '6slh': False, '6snt': False, '6spf': False, '6spg': False, '6sqq': False, '6sqt': False, '6sqv': False, '6sr7': False, '6srv': False, '6srx': False, '6ss0': False, '6ss2': False, '6ss4': False, '6ss5': False, '6ss6': False, '6sue': False, '6suf': False, '6suj': False, '6suv': False, '6sv4': False, '6swa': False, '6swn': False, '6swo': False, '6swp': False, '6swq': False, '6swv': False, '6swz': False, '6sx4': False, '6sxo': False, '6sxs': False, '6sxu': False, '6syo': False, '6syp': False, '6syx': False, '6szd': False, '6szk': False, '6szs': False, '6t14': False, '6t17': False, '6t19': False, '6t1a': False, '6t1c': False, '6t28': False, '6t29': False, '6t2n': False, '6t2o': False, '6t2p': False, '6t2q': False, '6t2r': False, '6t2s': False, '6t42': False, '6t44': False, '6t4q': False, '6t4r': False, '6t59': False, '6t5o': False, '6t6g': False, '6t6q': False, '6t75': False, '6t7g': False, '6t7i': False, '6t7n': False, '6t7t': False, '6t83': False, '6t8s': False, '6t8u': False, '6t8v': False, '6t8w': False, '6t8y': False, '6t8z': False, '6t92': False, '6t94': False, '6t9d': False, '6t9e': False, '6t9m': False, '6t9r': False, '6ta1': False, '6tah': False, '6tai': False, '6taj': False, '6tak': False, '6tb3': False, '6tb9': False, '6tba': False, '6tbv': False, '6tby': False, '6tc3': False, '6tc5': False, '6tc7': False, '6tcl': False, '6tcx': False, '6tcy': False, '6tdu': False, '6tee': False, '6tef': False, '6teg': False, '6tf9': False, '6tg4': False, '6tg8': False, '6th6': False, '6thy': False, '6tib': False, '6tic': False, '6tid': False, '6tie': False, '6tig': False, '6tih': False, '6tii': False, '6tij': False, '6til': False, '6tin': False, '6tio': False, '6tix': False, '6tjj': False, '6tjk': False, '6tjq': False, '6tjs': False, '6tju': False, '6tk8': False, '6tka': False, '6tkb': False, '6tkc': False, '6tkd': False, '6tke': False, '6tkf': False, '6tkm': False, '6tkn': False, '6tkp': False, '6tkq': False, '6tkr': False, '6tks': False, '6tkz': False, '6tlu': False, '6tm4': False, '6tmg': False, '6tmj': False, '6tmk': False, '6tml': False, '6tmm': False, '6tmp': False, '6tmq': False, '6tmz': False, '6tn0': False, '6tn1': False, '6tn2': False, '6tn4': False, '6tn5': False, '6tnl': False, '6tnu': False, '6to3': False, '6to5': False, '6to9': False, '6toc': False, '6toq': False, '6tpx': False, '6tpy': False, '6tpz': False, '6tq1': False, '6tq2': False, '6tr9': False, '6trg': False, '6trv': False, '6trz': False, '6ts0': False, '6ts1': False, '6tsa': False, '6tsb': False, '6tsd': False, '6tsf': False, '6tsj': False, '6tsl': False, '6tsm': False, '6tsn': False, '6tso': False, '6tsp': False, '6tsq': False, '6tsr': False, '6tss': False, '6tsu': False, '6tsx': False, '6tt5': False, '6tt9': False, '6tu5': False, '6tu7': False, '6tud': False, '6tui': False, '6tul': False, '6tur': False, '6tvi': False, '6tvk': False, '6tvq': False, '6tvu': False, '6tvw': False, '6tw5': False, '6tw6': False, '6tw7': False, '6tw8': False, '6two': False, '6twp': False, '6twz': False, '6txq': False, '6txs': False, '6tz4': False, '6tz5': False, '6tz9': False, '6u0r': False, '6u0v': False, '6u2l': False, '6u3q': False, '6u42': False, '6u48': False, '6u5b': False, '6u5f': False, '6u5k': False, '6ucq': False, '6udj': False, '6ue0': False, '6uo1': False, '6utv': False, '6utw': False, '6utx': False, '6uty': False, '6utz': False, '6uu0': False, '6uu1': False, '6uu2': False, '6uu3': False, '6uu4': False, '6uu5': False, '6uu6': False, '6uu7': False, '6uu8': False, '6uu9': False, '6uua': False, '6uub': False, '6uuc': False, '6uwi': False, '6uz7': False, '6uzc': False, '6v10': False, '6v12': False, '6v1g': False, '6v1t': False, '6v1z': False, '6v39': False, '6v3a': False, '6v3b': False, '6v3d': False, '6v3e': False, '6v41': False, '6v6s': False, '6v8i': False, '6v8w': False, '6v9t': False, '6ve7': False, '6vgs': False, '6vho': False, '6vlz': False, '6vmi': False, '6vu3': False, '6vvh': False, '6vvi': False, '6vwl': False, '6vwm': False, '6vwn': False, '6vyq': False, '6vyr': False, '6vys': False, '6vyt': False, '6vyu': False, '6vyw': False, '6vyx': False, '6vyy': False, '6vyz': False, '6vz2': False, '6vz3': False, '6vz5': False, '6vz7': False, '6vzj': False, '6w19': False, '6w1n': False, '6w1o': False, '6w1p': False, '6w1q': False, '6w1r': False, '6w1t': False, '6w1u': False, '6w1v': False, '6w2s': False, '6w2t': False, '6w6p': False, '6wat': False, '6wd0': False, '6wd1': False, '6wd2': False, '6wd3': False, '6wd4': False, '6wd5': False, '6wd6': False, '6wd7': False, '6wd8': False, '6wd9': False, '6wda': False, '6wdb': False, '6wdc': False, '6wdd': False, '6wde': False, '6wdf': False, '6wdg': False, '6wdh': False, '6wdi': False, '6wdj': False, '6wdk': False, '6wdl': False, '6wdm': False, '6wet': False, '6weu': False, '6wev': False, '6wew': False, '6wfj': False, '6wft': False, '6wfu': False, '6wh3': False, '6wh7': False, '6wjd': False, '6wks': False, '6wkv': False, '6wkx': False, '6wl7': False, '6wl8': False, '6wl9': False, '6wla': False, '6wnv': False, '6wnw': False, '6woo': False, '6wot': False, '6wou': False, '6wov': False, '6ws0': False, '6ws5': False, '6wxe': False, '6wxf': False, '6wxg': False, '6x2i': False, '6x2k': False, '6x32': False, '6x33': False, '6x35': False, '6x36': False, '6x5i': False, '6x62': False, '6x63': False, '6x64': False, '6x65': False, '6x66': False, '6x6g': False, '6x6h': False, '6x6i': False, '6x6j': False, '6x6k': False, '6x6l': False, '6x6s': False, '6x6t': False, '6x6y': False, '6x7a': False, '6x7f': False, '6x7k': False, '6x89': False, '6x9q': False, '6xa1': False, '6xdq': False, '6xdr': False, '6xgf': False, '6xhv': False, '6xhw': False, '6xhx': False, '6xhy': False, '6xi1': False, '6xi3': False, '6xii': False, '6xij': False, '6xir': False, '6xlb': False, '6xnr': False, '6xqd': False, '6xqe': False, '6xr8': False, '6xtj': False, '6xu6': False, '6xu7': False, '6xu8': False, '6xv5': False, '6xvx': False, '6xw0': False, '6xwk': False, '6xwz': False, '6xxf': False, '6xxg': False, '6xxx': False, '6xy3': False, '6xy7': False, '6xyj': False, '6xyw': False, '6xza': False, '6xzb': False, '6xzd': False, '6xzg': False, '6xzp': False, '6xzr': False, '6y01': False, '6y0c': False, '6y0g': False, '6y0k': False, '6y0r': False, '6y0s': False, '6y25': False, '6y2i': False, '6y2j': False, '6y2l': False, '6y2m': False, '6y2t': False, '6y33': False, '6y34': False, '6y3d': False, '6y3q': False, '6y4a': False, '6y57': False, '6y5z': False, '6y60': False, '6y61': False, '6y63': False, '6y65': False, '6y66': False, '6y67': False, '6y69': False, '6y6x': False, '6y7m': False, '6y7q': False, '6y8g': False, '6y8k': False, '6y8x': False, '6y9i': False, '6y9k': False, '6yab': False, '6yaj': False, '6yak': False, '6yam': False, '6yax': False, '6yb4': False, '6yd2': False, '6yd3': False, '6ydp': False, '6ydv': False, '6ydw': False, '6yef': False, '6yf7': False, '6yf9': False, '6yfa': False, '6yfb': False, '6yfc': False, '6yfd': False, '6yfe': False, '6yff': False, '6yfg': False, '6yfh': False, '6yfj': False, '6yfk': False, '6yfl': False, '6yfm': False, '6yfn': False, '6yfo': False, '6yfp': False, '6yfq': False, '6yfr': False, '6yfs': False, '6yft': False, '6yfu': False, '6ygt': False, '6yh0': False, '6yi0': False, '6yi4': False, '6yjq': False, '6yjr': False, '6yjs': False, '6yjt': False, '6yju': False, '6yjv': False, '6ykg': False, '6yl3': False, '6ylf': False, '6ylg': False, '6ylh': False, '6ylx': False, '6yly': False, '6ylz': False, '6ymh': False, '6ymn': False, '6ymq': False, '6yn6': False, '6ynx': False, '6yny': False, '6ynz': False, '6yo0': False, '6yo5': False, '6yoz': False, '6yp1': False, '6ypr': False, '6ypx': False, '6ypz': False, '6yq0': False, '6yq3': False, '6yq6': False, '6yq9': False, '6yqa': False, '6yqb': False, '6yqc': False, '6yqd': False, '6yqh': False, '6yqm': False, '6yqr': False, '6yqs': False, '6yr1': False, '6yr2': False, '6yru': False, '6yrv': False, '6yrx': False, '6yrz': False, '6ys1': False, '6ys2': False, '6ysr': False, '6yss': False, '6yst': False, '6ysu': False, '6ytj': False, '6ytn': False, '6ytp': False, '6ytr': False, '6yul': False, '6yum': False, '6yut': False, '6yv3': False, '6yve': False, '6yvp': False, '6yvr': False, '6yw5': False, '6ywe': False, '6yws': False, '6ywv': False, '6ywx': False, '6ywy': False, '6yx7': False, '6yx8': False, '6yxl': False, '6yxm': False, '6yxx': False, '6yxy': False, '6yxz': False, '6yy5': False, '6yyn': False, '6yyo': False, '6yyp': False, '6yyq': False, '6yyr': False, '6yz3': False, '6yz7': False, '6yzd': False, '6yzf': False, '6yzy': False, '6z07': False, '6z0u': False, '6z14': False, '6z17': False, '6z1p': False, '6z1q': False, '6z1t': False, '6z2e': False, '6z2n': False, '6z33': False, '6z39': False, '6z3c': False, '6z3i': False, '6z3n': False, '6z3o': False, '6z3v': False, '6z4g': False, '6z4h': False, '6z4i': False, '6z4j': False, '6z4k': False, '6z4l': False, '6z4m': False, '6z4n': False, '6z4o': False, '6z4q': False, '6z4s': False, '6z4v': False, '6z5f': False, '6z5h': False, '6z5k': False, '6z5o': False, '6z5v': False, '6z66': False, '6z6b': False, '6z6c': False, '6z6j': False, '6z6k': False, '6z6l': False, '6z6m': False, '6z6n': False, '6z6s': False, '6z7f': False, '6z7g': False, '6z7l': False, '6z7m': False, '6z83': False, '6z84': False, '6z8n': False, '6z8p': False, '6za8': False, '6zb0': False, '6zb1': False, '6zb2': False, '6zb3': False, '6zba': False, '6zbp': False, '6zdk': False, '6zdl': False, '6zdm': False, '6zdw': False, '6zed': False, '6zel': False, '6zet': False, '6zeu': False, '6zev': False, '6zf9': False, '6zfa': False, '6zfn': False, '6zfq': False, '6zh9': False, '6zhc': False, '6zi0': False, '6zi1': False, '6zib': False, '6zic': False, '6zin': False, '6ziv': False, '6zj1': False, '6zj3': False, '6zj4': False, '6zj5': False, '6zj6': False, '6zj7': False, '6zjg': False, '6zjh': False, '6zji': False, '6zk0': False, '6zk7': False, '6zlm': False, '6zlo': False, '6zlr': False, '6zm0': False, '6zm3': False, '6zm5': False, '6zm6': False, '6zm7': False, '6zme': False, '6zmi': False, '6zmo': False, '6zmw': False, '6zmz': False, '6zn1': False, '6zna': False, '6znb': False, '6zny': False, '6znz': False, '6zo0': False, '6zo1': False, '6zo2': False, '6zo3': False, '6zon': False, '6zp4': False, '6zps': False, '6zpv': False, '6zpw': False, '6zpx': False, '6zpy': False, '6zpz': False, '6zq0': False, '6zq1': False, '6zqa': False, '6zqb': False, '6zqc': False, '6zqd': False, '6zqe': False, '6zqf': False, '6zqg': False, '6zr7': False, '6zrx': False, '6zry': False, '6zrz': False, '6zs0': False, '6zs8': False, '6zs9': False, '6zsa': False, '6zsb': False, '6zsc': False, '6zsd': False, '6zse': False, '6zsg': False, '6zsp': False, '6zsq': False, '6zsr': False, '6zt0': False, '6ztj': False, '6ztl': False, '6ztm': False, '6ztn': False, '6zto': False, '6ztp': False, '6zu1': False, '6zu2': False, '6zu5': False, '6zuj': False, '6zv5': False, '6zvj': False, '6zvk': False, '6zvm': False, '6zvn': False, '6zvr': False, '6zvs': False, '6zvt': False, '6zw4': False, '6zw5': False, '6zw6': False, '6zw7': False, '6zwv': False, '6zyt': False, '6zz5': False, '6zzc': False, '6zzh': False, '6zzr': False, '7a01': False, '7a09': False, '7a0d': False, '7a0e': False, '7a0f': False, '7a0g': False, '7a0j': False, '7a26': False, '7a27': False, '7a4f': False, '7a4g': False, '7a4h': False, '7a4i': False, '7a4j': False, '7a5a': False, '7a5f': False, '7a5g': False, '7a5h': False, '7a5i': False, '7a5j': False, '7a5k': False, '7a6w': False, '7a6x': False, '7a7d': False, '7a8w': False, '7a8y': False, '7a9c': False, '7a9g': False, '7a9h': False, '7a9u': False, '7a9y': False, '7a9z': False, '7aa0': False, '7aa1': False, '7aae': False, '7aai': False, '7abf': False, '7abg': False, '7abz': False, '7ac0': False, '7ac7': False, '7acj': False, '7acr': False, '7adz': False, '7ae0': False, '7ae4': False, '7aeb': False, '7aef': False, '7aek': False, '7ael': False, '7aet': False, '7aeu': False, '7aev': False, '7aew': False, '7aey': False, '7af2': False, '7agx': False, '7ah2': False, '7ah9': False, '7ahi': False, '7ahp': False, '7ahw': False, '7ahy': False, '7ahz': False, '7ai0': False, '7ai1': False, '7aia': False, '7aih': False, '7ajb': False, '7ajc': False, '7ajd': False, '7aje': False, '7ajf': False, '7ajg': False, '7ajh': False, '7aji': False, '7ajj': False, '7ajk': False, '7ajl': False, '7ajr': False, '7ajt': False, '7aju': False, '7ak4': False, '7akr': False, '7aks': False, '7all': False, '7alx': False, '7aly': False, '7am2': False, '7amx': False, '7an1': False, '7ana': False, '7anb': False, '7ane': False, '7anm': False, '7anu': False, '7aoi': False, '7aor': False, '7ap5': False, '7aph': False, '7ar4': False, '7arq': False, '7as4': False, '7asd': False, '7ase': False, '7aso': False, '7asp': False, '7ata': False, '7ath': False, '7atk': False, '7atl': False, '7av4': False, '7av5': False, '7av8': False, '7av9': False, '7avc': False, '7ax3': False, '7ayy': False, '7ayz': False, '7az0': False, '7azo': False, '7azs': False, '7b04': False, '7b0m': False, '7b0n': False, '7b0u': False, '7b1y': False, '7b20': False, '7b21': False, '7b23': False, '7b24': False, '7b25': False, '7b27': False, '7b2t': False, '7b4i': False, '7b4o': False, '7b4p': False, '7b58': False, '7b59': False, '7b5a': False, '7b5h': False, '7b5i': False, '7b5k': False, '7b69': False, '7b6a': False, '7b6c': False, '7b6g': False, '7b6s': False, '7b6t': False, '7b6u': False, '7b6v': False, '7b6w': False, '7b74': False, '7b7d': False, '7b9v': False, '7b9w': False, '7bay': False, '7bbo': False, '7bbp': False, '7bdz': False, '7be0': False, '7be1': False, '7be2': False, '7beb': False, '7bec': False, '7bf1': False, '7bf2': False, '7bf7': False, '7bf8': False, '7bf9': False, '7bfa': False, '7bg5': False, '7bga': False, '7bgl': False, '7bgo': False, '7bgx': False, '7bgz': False, '7bh0': False, '7bhp': False, '7bij': False, '7bk5': False, '7bk6': False, '7bk7': False, '7bkx': False, '7bl1': False, '7bl2': False, '7bl3': False, '7blh': False, '7bn2': False, '7bnm': False, '7bns': False, '7bnu': False, '7bo7': False, '7bsi': False, '7bt6': False, '7btb': False, '7bw6': False, '7bzw': False, '7bzy': False, '7cbm': False, '7cf9': False, '7cgb': False, '7cgo': False, '7ckb': False, '7ckc': False, '7coy': False, '7cpj': False, '7cpu': False, '7cpv': False, '7cr8': False, '7d4i': False, '7d5s': False, '7d5t': False, '7d63': False, '7d6z': False, '7d80': False, '7dco': False, '7dgq': False, '7dgr': False, '7dgs': False, '7dkf': False, '7dr2': False, '7drb': False, '7dwx': False, '7e80': False, '7e81': False, '7e82': False, '7eaj': False, '7egb': False, '7egc': False, '7elh': False, '7ena': False, '7enc': False, '7eq9': False, '7etj': False, '7eto': False, '7evf': False, '7ew5': False, '7ext': False, '7ey0': False, '7ey4': False, '7eyd': False, '7ezx': False, '7f25': False, '7f2n': False, '7f4v': False, '7f5s': False, '7f8i': False, '7f9o': False, '7fb1': False, '7fcf': False, '7ff7': False, '7ffe': False, '7fff': False, '7ffl': False, '7ffq': False, '7fik': False, '7fix': False, '7fj1': False, '7fj3': False, '7jgl': False, '7jgm': False, '7jgn': False, '7jil': False, '7jjj': False, '7jk9': False, '7jmf': False, '7jmg': False, '7jmh': False, '7jmi': False, '7jmj': False, '7joq': False, '7jql': False, '7jqm': False, '7jqu': False, '7jss': False, '7jsw': False, '7jsz': False, '7jt1': False, '7jt2': False, '7jt3': False, '7ju4': False, '7k00': False, '7k0s': False, '7k0t': False, '7k22': False, '7k23': False, '7k24': False, '7k50': False, '7k51': False, '7k52': False, '7k53': False, '7k54': False, '7k55': False, '7k58': False, '7k5b': False, '7k6v': False, '7kek': False, '7kgb': False, '7kh1': False, '7kip': False, '7kjk': False, '7kln': False, '7kp5': False, '7kr6': False, '7kts': False, '7kv2': False, '7kv3': False, '7kv4': False, '7kv5': False, '7kv6': False, '7kv7': False, '7kwc': False, '7kzm': False, '7kzo': False, '7kzp': False, '7kzq': False, '7kzr': False, '7kzs': False, '7kzt': False, '7kzv': False, '7l08': False, '7l0u': False, '7l0v': False, '7l0w': False, '7l0x': False, '7l0y': False, '7l18': False, '7l20': False, '7l2y': False, '7l5q': False, '7l5u': False, '7l6a': False, '7l6b': False, '7l6e': False, '7l6f': False, '7l6h': False, '7l6i': False, '7l7x': False, '7l7y': False, '7l7z': False, '7l81': False, '7l82': False, '7l8f': False, '7lbm': False, '7lh5': False, '7lhd': False, '7lki': False, '7lmg': False, '7lmh': False, '7lmi': False, '7lmj': False, '7lnk': False, '7lqe': False, '7lqf': False, '7lqg': False, '7lqh': False, '7lqi': False, '7ls1': False, '7ls2': False, '7ltm': False, '7lu7': False, '7lv0': False, '7lvk': False, '7m1a': False, '7m1b': False, '7m2t': False, '7m2v': False, '7m3l': False, '7m3r': False, '7m3t': False, '7m4w': False, '7m4x': False, '7m4y': False, '7m4z': False, '7m50': False, '7m54': False, '7m57': False, '7m5d': False, '7m6a': False, '7m6l': False, '7md2': False, '7md3': False, '7md7': False, '7mdz': False, '7mf0': False, '7mfj': False, '7mfm': False, '7mfn': False, '7mfs': False, '7mhe': False, '7miz': False, '7mkr': False, '7mks': False, '7mly': False, '7moq': False, '7mpi': False, '7mpj': False, '7mq8': False, '7mq9': False, '7mqa': False, '7mqt': False, '7msc': False, '7msh': False, '7msm': False, '7msz': False, '7mt0': False, '7mt2': False, '7mt3': False, '7mt7': False, '7mtg': False, '7mtp': False, '7mtw': False, '7mtz': False, '7mua': False, '7muc': False, '7mud': False, '7mue': False, '7muq': False, '7mus': False, '7muv': False, '7muw': False, '7muy': False, '7mx6': False, '7mxz': False, '7my1': False, '7my7': False, '7myd': False, '7n1h': False, '7n1i': False, '7n1p': False, '7n2c': False, '7n2u': False, '7n2v': False, '7n30': False, '7n31': False, '7n3m': False, '7n50': False, '7n51': False, '7n52': False, '7n61': False, '7n65': False, '7n6g': False, '7n7x': False, '7n85': False, '7n8b': False, '7n9f': False, '7n9x': False, '7na6': False, '7nac': False, '7nb1': False, '7nb2': False, '7nb3': False, '7nbc': False, '7nbd': False, '7nbf': False, '7nbg': False, '7nbh': False, '7nbr': False, '7nbs': False, '7nbt': False, '7nbu': False, '7ncx': False, '7ndq': False, '7ndt': False, '7ndu': False, '7ne9': False, '7neo': False, '7nfv': False, '7nfx': False, '7ng8': False, '7nh6': False, '7nh8': False, '7nhk': False, '7nhl': False, '7nhm': False, '7nhn': False, '7nkt': False, '7nlv': False, '7nm7': False, '7nm8': False, '7nmc': False, '7nmo': False, '7nmr': False, '7nmt': False, '7nmu': False, '7nmv': False, '7nmy': False, '7nmz': False, '7nn5': False, '7np7': False, '7npm': False, '7npn': False, '7npr': False, '7nps': False, '7npt': False, '7npu': False, '7npv': False, '7npy': False, '7npz': False, '7nq0': False, '7nq1': False, '7nq2': False, '7nq3': False, '7nq5': False, '7nq7': False, '7nq9': False, '7nqb': False, '7nqg': False, '7nqh': False, '7nql': False, '7nr2': False, '7nra': False, '7nrc': False, '7nrd': False, '7nrj': False, '7nrr': False, '7ns0': False, '7ns3': False, '7nsh': False, '7nsi': False, '7nsj': False, '7nso': False, '7nsp': False, '7nsq': False, '7nsw': False, '7nsx': False, '7nsy': False, '7nsz': False, '7nt0': False, '7ntd': False, '7nvg': False, '7nvj': False, '7nvk': False, '7nvp': False, '7nw1': False, '7nwn': False, '7nwo': False, '7nwp': False, '7nwq': False, '7nwt': False, '7nwv': False, '7nww': False, '7nwy': False, '7nxl': False, '7nxz': False, '7nyd': False, '7nyk': False, '7nyl': False, '7nym': False, '7nyn': False, '7nyo': False, '7nz2': False, '7nz3': False, '7nz4': False, '7nzb': False, '7nzc': False, '7nzd': False, '7nze': False, '7nzf': False, '7nzh': False, '7nzo': False, '7nzp': False, '7nzq': False, '7o00': False, '7o0u': False, '7o0v': False, '7o0w': False, '7o0x': False, '7o18': False, '7o19': False, '7o1a': False, '7o1c': False, '7o2t': False, '7o2u': False, '7o2y': False, '7o42': False, '7o4a': False, '7o5b': False, '7o5n': False, '7o5q': False, '7o5t': False, '7o5v': False, '7o5w': False, '7o6x': False, '7o76': False, '7o7y': False, '7o7z': False, '7o80': False, '7o81': False, '7o87': False, '7o9k': False, '7o9m': False, '7o9n': False, '7o9r': False, '7oa0': False, '7oa1': False, '7oa4': False, '7oao': False, '7oap': False, '7oaq': False, '7oau': False, '7oay': False, '7obo': False, '7obr': False, '7obw': False, '7ocj': False, '7od0': False, '7odj': False, '7odk': False, '7odm': False, '7odr': False, '7ods': False, '7odt': False, '7odv': False, '7oe4': False, '7oe5': False, '7oe6': False, '7oeo': False, '7oep': False, '7oer': False, '7oes': False, '7oet': False, '7of1': False, '7of2': False, '7of3': False, '7of4': False, '7of5': False, '7of6': False, '7of7': False, '7ofu': False, '7og1': False, '7og4': False, '7ogo': False, '7ogq': False, '7ogu': False, '7ogy': False, '7ogz': False, '7oh3': False, '7oho': False, '7ohp': False, '7ohq': False, '7ohr': False, '7ohs': False, '7oht': False, '7ohu': False, '7ohv': False, '7ohw': False, '7ohx': False, '7ohy': False, '7oi4': False, '7oic': False, '7oid': False, '7oif': False, '7oig': False, '7oii': False, '7oij': False, '7oil': False, '7oiq': False, '7ois': False, '7oit': False, '7oiz': False, '7oj0': False, '7oj7': False, '7ojf': False, '7ojg': False, '7ojm': False, '7ojo': False, '7ok6': False, '7ok8': False, '7oko': False, '7okr': False, '7okz': False, '7olc': False, '7old': False, '7olj': False, '7oln': False, '7olu': False, '7olw': False, '7om1': False, '7omc': False, '7omi': False, '7oms': False, '7omu': False, '7omv': False, '7omw': False, '7omx': False, '7omy': False, '7omz': False, '7on0': False, '7onp': False, '7oqd': False, '7oqv': False, '7or6': False, '7ord': False, '7orp': False, '7orq': False, '7orx': False, '7os3': False, '7os5': False, '7os6': False, '7os9': False, '7osa': False, '7osm': False, '7ost': False, '7ot5': False, '7otc': False, '7ou1': False, '7ou6': False, '7ou8': False, '7ouc': False, '7oud': False, '7ouj': False, '7ouo': False, '7ova': False, '7ovw': False, '7ow7': False, '7oxp': False, '7oxr': False, '7oya': False, '7oyb': False, '7oyc': False, '7oyd': False, '7oyk': False, '7oz6': False, '7oz8': False, '7oz9': False, '7oza': False, '7ozb': False, '7ozc': False, '7ozd': False, '7oze': False, '7ozf': False, '7ozt': False, '7ozy': False, '7p0k': False, '7p1n': False, '7p1p': False, '7p1t': False, '7p1y': False, '7p24': False, '7p26': False, '7p2e': False, '7p2z': False, '7p30': False, '7p32': False, '7p35': False, '7p3k': False, '7p48': False, '7p4c': False, '7p4d': False, '7p4r': False, '7p4y': False, '7p50': False, '7p5r': False, '7p5u': False, '7p5x': False, '7p5z': False, '7p6f': False, '7p6v': False, '7p6w': False, '7p6y': False, '7p6z': False, '7p75': False, '7p7i': False, '7p7n': False, '7p7o': False, '7p7q': False, '7p7r': False, '7p7s': False, '7p7t': False, '7p7u': False, '7p7w': False, '7p97': False, '7p9l': False, '7p9p': False, '7p9y': False, '7pa1': False, '7pa2': False, '7pa3': False, '7pa6': False, '7pa7': False, '7pa8': False, '7pa9': False, '7paa': False, '7pah': False, '7pai': False, '7paj': False, '7pak': False, '7pal': False, '7pam': False, '7pan': False, '7pao': False, '7paq': False, '7par': False, '7pas': False, '7pb3': False, '7pbc': False, '7pbd': False, '7pbj': False, '7pbu': False, '7pbw': False, '7pbx': False, '7pbz': False, '7pc0': False, '7pd3': False, '7pd6': False, '7pdb': False, '7pdr': False, '7pdw': False, '7pdx': False, '7pe1': False, '7pe2': False, '7pe5': False, '7pe6': False, '7peh': False, '7pei': False, '7pep': False, '7peq': False, '7pfn': False, '7pfo': False, '7pgo': False, '7pgx': False, '7pgy': False, '7pgz': False, '7ph0': False, '7ph9': False, '7pha': False, '7phb': False, '7phc': False, '7phm': False, '7pi1': False, '7pi4': False, '7pi5': False, '7pi8': False, '7pi9': False, '7pia': False, '7pib': False, '7pic': False, '7pil': False, '7pin': False, '7pio': False, '7pip': False, '7piq': False, '7pir': False, '7pis': False, '7pit': False, '7piw': False, '7pj3': False, '7pj4': False, '7pj5': False, '7pj6': False, '7pjo': False, '7pjs': False, '7pjt': False, '7pju': False, '7pjv': False, '7pjw': False, '7pjx': False, '7pjy': False, '7pjz': False, '7pk6': False, '7pkc': False, '7pkr': False, '7pks': False, '7pkt': False, '7pky': False, '7pkz': False, '7pl5': False, '7plf': False, '7plo': False, '7plr': False, '7pmk': False, '7pmn': False, '7pmo': False, '7pmz': False, '7pnh': False, '7pni': False, '7pnk': False, '7pnt': False, '7pnu': False, '7pnv': False, '7pnw': False, '7pnx': False, '7pny': False, '7pnz': False, '7po0': False, '7po1': False, '7po2': False, '7po3': False, '7po4': False, '7pox': False, '7pp1': False, '7ppx': False, '7pq2': False, '7pq3': False, '7pq6': False, '7pq9': False, '7pqa': False, '7pqd': False, '7pqh': False, '7pqn': False, '7pqo': False, '7pr6': False, '7prg': False, '7pri': False, '7prp': False, '7prs': False, '7psa': False, '7pse': False, '7psf': False, '7pt6': False, '7pt7': False, '7ptz': False, '7pu1': False, '7pua': False, '7pub': False, '7pus': False, '7pvi': False, '7pwb': False, '7pwg': False, '7pwh': False, '7pwi': False, '7pwj': False, '7pwo': False, '7pwx': False, '7px5': False, '7px9': False, '7pxb': False, '7pxc': False, '7pxd': False, '7pxo': False, '7pz1': False, '7pzg': False, '7pzh': False, '7pzy': False, '7q08': False, '7q0f': False, '7q0p': False, '7q0r': False, '7q14': False, '7q17': False, '7q18': False, '7q19': False, '7q1i': False, '7q1j': False, '7q1w': False, '7q20': False, '7q2n': False, '7q2o': False, '7q2p': False, '7q2t': False, '7q2u': False, '7q2w': False, '7q30': False, '7q31': False, '7q32': False, '7q39': False, '7q3c': False, '7q3i': False, '7q4k': False, '7q4s': False, '7q4t': False, '7q4u': False, '7q52': False, '7q5c': False, '7q5i': False, '7q5p': False, '7q5r': False, '7q5s': False, '7q5t': False, '7q5u': False, '7q5w': False, '7q63': False, '7q6h': False, '7q7p': False, '7q7q': False, '7q8d': False, '7q8f': False, '7q8g': False, '7q8h': False, '7q8i': False, '7q8j': False, '7q8k': False, '7q8l': False, '7q8m': False, '7q8n': False, '7q8o': False, '7q8p': False, '7q8q': False, '7q91': False, '7q92': False, '7q93': False, '7q9b': False, '7q9c': False, '7q9h': False, '7q9q': False, '7q9r': False, '7q9s': False, '7q9u': False, '7q9x': False, '7q9z': False, '7qan': False, '7qb1': False, '7qb5': False, '7qbh': False, '7qc6': False, '7qc7': False, '7qc8': False, '7qca': False, '7qcq': False, '7qdf': False, '7qdl': False, '7qe3': False, '7qe4': False, '7qel': False, '7qep': False, '7qf2': False, '7qf3': False, '7qf4': False, '7qf5': False, '7qf9': False, '7qfc': False, '7qff': False, '7qfh': False, '7qg8': False, '7qg9': False, '7qgf': False, '7qgg': False, '7qgh': False, '7qgn': False, '7qgq': False, '7qgr': False, '7qgu': False, '7qh4': False, '7qhi': False, '7qhj': False, '7qhk': False, '7qhs': False, '7qhv': False, '7qhw': False, '7qi4': False, '7qij': False, '7qj5': False, '7qj7': False, '7qj8': False, '7qja': False, '7qjb': False, '7qjc': False, '7qjd': False, '7qjh': False, '7qjk': False, '7ql8': False, '7qlg': False, '7qlq': False, '7qls': False, '7qnf': False, '7qnh': False, '7qni': False, '7qnj': False, '7qnp': False, '7qns': False, '7qnv': False, '7qo2': False, '7qo5': False, '7qo6': False, '7qob': False, '7qon': False, '7qor': False, '7qp6': False, '7qp7': False, '7qpk': False, '7qpw': False, '7qq0': False, '7qq8': False, '7qrh': False, '7qrk': False, '7qse': False, '7qsf': False, '7qsi': False, '7qsr': False, '7qtc': False, '7qu7': False, '7qub': False, '7quk': False, '7qum': False, '7qup': False, '7quw': False, '7quz': False, '7qv1': False, '7qv2': False, '7qv3': False, '7qvk': False, '7qvp': False, '7qvr': False, '7qvw': False, '7qw3': False, '7qwh': False, '7qwi': False, '7qwo': False, '7qwq': False, '7qwr': False, '7qws': False, '7qx1': False, '7qxc': False, '7qxd': False, '7qxe': False, '7qxn': False, '7qxp': False, '7qxt': False, '7qxu': False, '7qxw': False, '7qxx': False, '7qy6': False, '7qya': False, '7qyb': False, '7qyh': False, '7qyk': False, '7qyl': False, '7qym': False, '7qyx': False, '7qyy': False, '7qzm': False, '7qzx': False, '7qzy': False, '7qzz': False, '7r00': False, '7r05': False, '7r0p': False, '7r0y': False, '7r1g': False, '7r1n': False, '7r1o': False, '7r1p': False, '7r1q': False, '7r1x': False, '7r2y': False, '7r2z': False, '7r30': False, '7r31': False, '7r32': False, '7r3d': False, '7r3n': False, '7r55': False, '7r5c': False, '7r5j': False, '7r5k': False, '7r5t': False, '7r5x': False, '7r6j': False, '7r7a': False, '7r81': False, '7r8u': False, '7rd1': False, '7rd2': False, '7rev': False, '7rf1': False, '7rf2': False, '7rf3': False, '7rf4': False, '7rf5': False, '7rf6': False, '7rf7': False, '7rf8': False, '7ri8': False, '7rk8': False, '7rk9': False, '7rl0': False, '7rl1': False, '7rl5': False, '7rmc': False, '7rmf': False, '7rmk': False, '7rmo': False, '7rnl': False, '7rnr': False, '7rq8': False, '7rq9': False, '7rqa': False, '7rqb': False, '7rqc': False, '7rqd': False, '7rqe': False, '7rr5': False, '7rro': False, '7rs5': False, '7rs6': False, '7rsx': False, '7rsy': False, '7rsz': False, '7rte': False, '7rti': False, '7rwl': False, '7rwt': False, '7rxy': False, '7ry5': False, '7ryd': False, '7ryf': False, '7ryg': False, '7ryh': False, '7ryj': False, '7ryk': False, '7rz0': False, '7rz2': False, '7s0p': False, '7s0s': False, '7s1g': False, '7s1h': False, '7s1i': False, '7s1j': False, '7s1k': False, '7s1w': False, '7s64': False, '7s78': False, '7s9v': False, '7sa4': False, '7sbq': False, '7sc7': False, '7sc8': False, '7sc9': False, '7sca': False, '7scb': False, '7scc': False, '7sfd': False, '7sfr': False, '7sn4': False, '7sn9': False, '7soe': False, '7sof': False, '7som': False, '7sp4': False, '7spb': False, '7spc': False, '7spi': False, '7spj': False, '7spk': False, '7spu': False, '7sqc': False, '7sqd': False, '7sqf': False, '7sqh': False, '7sqq': False, '7sqt': False, '7ss9': False, '7ssd': False, '7ssh': False, '7ssl': False, '7ssn': False, '7sso': False, '7ssw': False, '7st2': False, '7st3': False, '7st6': False, '7st7': False, '7stg': False, '7su4': False, '7su7': False, '7suk': False, '7sxn': False, '7sxr': False, '7sxx': False, '7sxz': False, '7sy1': False, '7sy7': False, '7t0w': False, '7t0z': False, '7t1w': False, '7t1x': False, '7t3p': False, '7t3q': False, '7t3r': False, '7t3t': False, '7t64': False, '7t65': False, '7t66': False, '7t68': False, '7t6w': False, '7t73': False, '7t74': False, '7t75': False, '7t76': False, '7t77': False, '7t7c': False, '7t81': False, '7t8n': False, '7t8o': False, '7t8v': False, '7t9a': False, '7t9b': False, '7t9e': False, '7tau': False, '7tbi': False, '7tbj': False, '7tbk': False, '7tbl': False, '7tbm': False, '7tcv': False, '7td9': False, '7tdg': False, '7tdh': False, '7tdi': False, '7tdj': False, '7tdk': False, '7tdv': False, '7tdz': False, '7tei': False, '7ten': False, '7tf6': False, '7tf7': False, '7tfa': False, '7tfb': False, '7tfc': False, '7tfd': False, '7tfo': False, '7tgh': False, '7thr': False, '7ti4': False, '7ti5': False, '7tjs': False, '7tjt': False, '7tjv': False, '7tjy': False, '7tk0': False, '7tk3': False, '7tk6': False, '7tkm': False, '7tkq': False, '7tms': False, '7tmt': False, '7tnb': False, '7tnq': False, '7tns': False, '7tnt': False, '7tok': False, '7too': False, '7top': False, '7toq': False, '7tor': False, '7tos': False, '7tr6': False, '7tr8': False, '7tr9': False, '7tra': False, '7tw2': False, '7tzc': False, '7u05': False, '7u06': False, '7u0h': False, '7u0l': False, '7u0p': False, '7u0q': False, '7u1i': False, '7u1j': False, '7u2h': False, '7u2i': False, '7u2j': False, '7u4p': False, '7u4t': False, '7u6f': False, '7u71': False, '7u8c': False, '7u8o': False, '7u8p': False, '7u8q': False, '7u8r': False, '7u94': False, '7u95': False, '7u96': False, '7u97': False, '7u9q': False, '7u9r': False, '7u9t': False, '7u9x': False, '7u9z': False, '7ua1': False, '7ua3': False, '7ua4': False, '7ua5': False, '7ua9': False, '7uc3': False, '7ucj': False, '7uck': False, '7ud4': False, '7ueb': False, '7ug6': False, '7ug7': False, '7uhz': False, '7ui0': False, '7ui9': False, '7uif': False, '7uig': False, '7uio': False, '7uli': False, '7ums': False, '7umt': False, '7un1': False, '7unc': False, '7und': False, '7une': False, '7unf': False, '7ung': False, '7unr': False, '7unu': False, '7unv': False, '7unw': False, '7uom': False, '7uoo': False, '7uph': False, '7uqb': False, '7uqz': False, '7usa': False, '7uti': False, '7utl': False, '7uvl': False, '7uw9': False, '7uwa': False, '7uwb': False, '7uwc': False, '7uwd': False, '7uxh': False, '7uxx': False, '7uxz': False, '7v08': False, '7v0k': False, '7v0t': False, '7v3u': False, '7v3v': False, '7v7h': False, '7v7i': False, '7v7j': False, '7v93': False, '7va9': False, '7vai': False, '7vaj': False, '7vak': False, '7val': False, '7vam': False, '7van': False, '7vao': False, '7vap': False, '7vaq': False, '7var': False, '7vas': False, '7vat': False, '7vau': False, '7vav': False, '7vaw': False, '7vax': False, '7vay': False, '7vb0': False, '7vb9': False, '7vba': False, '7vbb': False, '7vbc': False, '7vci': False, '7vd5': False, '7vd6': False, '7vea': False, '7vml': False, '7vmm': False, '7vmn': False, '7vmo': False, '7vmp': False, '7vmq': False, '7vmr': False, '7vms': False, '7vop': False, '7vor': False, '7vot': False, '7vrt': False, '7vs5': False, '7vu9': False, '7vub': False, '7vul': False, '7vum': False, '7vw7': False, '7vwy': False, '7vy2': False, '7w1c': False, '7w1d': False, '7w1e': False, '7w1y': False, '7w37': False, '7w38': False, '7w39': False, '7w3a': False, '7w3b': False, '7w3c': False, '7w3f': False, '7w3g': False, '7w3h': False, '7w3i': False, '7w3j': False, '7w3k': False, '7w3m': False, '7w5b': False, '7w5z': False, '7w85': False, '7w8g': False, '7wfd': False, '7wfe': False, '7wg5': False, '7wjw': False, '7wns': False, '7wot': False, '7wpr': False, '7wps': False, '7wqo': False, '7wqt': False, '7wtl': False, '7wtm': False, '7wtn': False, '7wto': False, '7wtp': False, '7wtq': False, '7wtr': False, '7wvh': False, '7wz8': False, '7x2g': False, '7x37': False, '7x38': False, '7x3f': False, '7x4k': False, '7x5a': False, '7x5t': False, '7x7q': False, '7xi9': False, '7xib': False, '7xm1': False, '7xn7': False, '7xse': False, '7xsx': False, '7xsz': False, '7xt7': False, '7xtd': False, '7xti': False, '7y4l': False, '7y5e': False, '7y6s': False, '7y7a': False, '7ycx': False, '7yfz': False, '7yla': False, '7yvq': False, '7ywt': False, '7yyp': False, '7yzh': False, '7yzs': False, '7z0n': False, '7z0p': False, '7z13': False, '7z1d': False, '7z1e': False, '7z1l': False, '7z1m': False, '7z1o': False, '7z1r': False, '7z34': False, '7z37': False, '7z3a': False, '7z3k': False, '7z3l': False, '7z3y': False, '7z43': False, '7z49': False, '7z4b': False, '7z4o': False, '7z55': False, '7z56': False, '7z5d': False, '7z5e': False, '7z5f': False, '7z5o': False, '7z5s': False, '7z5t': False, '7z62': False, '7z63': False, '7z66': False, '7z6t': False, '7z89': False, '7z8a': False, '7z8f': False, '7z9a': False, '7z9b': False, '7z9h': False, '7z9i': False, '7z9j': False, '7z9n': False, '7z9o': False, '7z9s': False, '7z9u': False, '7z9w': False, '7z9y': False, '7za6': False, '7za7': False, '7za8': False, '7za9': False, '7zaa': False, '7zab': False, '7zac': False, '7zad': False, '7zae': False, '7zaf': False, '7zaj': False, '7zal': False, '7zam': False, '7zar': False, '7zat': False, '7zaz': False, '7zb2': False, '7zc0': False, '7ze0': False, '7ze7': False, '7ze9': False, '7zen': False, '7zfn': False, '7zfo': False, '7zfs': False, '7zft': False, '7zfu': False, '7zfw': False, '7zfy': False, '7zfz': False, '7zg1': False, '7zg2': False, '7zg8': False, '7zgx': False, '7zgy': False, '7zhj': False, '7zil': False, '7zim': False, '7zin': False, '7zio': False, '7zip': False, '7ziq': False, '7zjw': False, '7zjx': False, '7zn2': False, '7zn4': False, '7zpq': False, '7zq9': False, '7zqb': False, '7zqc': False, '7zqd': False, '7zqn': False, '7zqo': False, '7zra': False, '7zrs': False, '7zs5': False, '7zs6': False, '7zts': False, '7zuf': False, '7zuh': False, '7zui': False, '7zuj': False, '7zuk': False, '7zul': False, '7zus': False, '7zuw': False, '7zux': False, '7zw0': False, '7zwd': False, '7zx0': False, '7zx1': False, '7zx7': False, '7zx8': False, '8a1d': False, '8a1s': False, '8a22': False, '8a2l': False, '8a2m': False, '8a3d': False, '8a3k': False, '8a47': False, '8a5n': False, '8a60': False, '8a9a': False, '8aa5': False, '8aaf': False, '8agb': False, '8agh': False, '8agk': False, '8ago': False, '8agt': False, '8agu': False, '8agv': False, '8agw': False, '8agx': False, '8agz': False, '8ah0': False, '8ah1': False, '8aij': False, '8aiy': False, '8aj4': False, '8aj5': False, '8akn': False, '8alp': False, '8am9': False, '8amt': False, '8anx': False, '8aoz': False, '8ap0': False, '8ap6': False, '8apa': False, '8apb': False, '8apc': False, '8apd': False, '8ape': False, '8apf': False, '8apg': False, '8aph': False, '8apj': False, '8apk': False, '8apn': False, '8ash': False, '8ask': False, '8atj': False, '8atm': False, '8ato': False, '8atu': False, '8atx': False, '8au1': False, '8auk': False, '8auw': False, '8axk': False, '8axl': False, '8axn': False, '8aye': False, '8b0b': False, '8b0c': False, '8b0d': False, '8b0e': False, '8b0f': False, '8b0g': False, '8b0h': False, '8b0x': False, '8b2a': False, '8b2b': False, '8b2c': False, '8b4n': False, '8b5g': False, '8b5h': False, '8b5i': False, '8b5j': False, '8b5l': False, '8b6c': False, '8b7l': False, '8b7o': False, '8b7y': False, '8bac': False, '8bfl': False, '8bfp': False, '8bip': False, '8bjq': False, '8bov': False, '8boy': False, '8bp2': False, '8bpx': False, '8bq5': False, '8bqd': False, '8bqx': False, '8br8': False, '8brm': False, '8bsi': False, '8bsj': False, '8btd': False, '8btr': False, '8bxu': False, '8bxv': False, '8bxw': False, '8bxx': False, '8c5c': False, '8cen': False, '8ceo': False, '8ci0': False, '8crx': False, '8cs9': False, '8csl': False, '8csy': False, '8cue': False, '8cvj': False, '8cvk': False, '8cvl': False, '8cvt': False, '8cw4': False, '8cwm': False, '8cxm': False, '8cxp': False, '8d3c': False, '8d3d': False, '8d6x': False, '8d6y': False, '8d9t': False, '8d9u': False, '8d9v': False, '8dbp': False, '8dbq': False, '8dbr': False, '8dbt': False, '8dbu': False, '8dbv': False, '8dbw': False, '8dd3': False, '8de6': False, '8dli': False, '8dlt': False, '8dlu': False, '8dlx': False, '8dlz': False, '8dp8': False, '8dp9': False, '8dpa': False, '8dv6': False, '8dvd': False, '8dwb': False, '8e2d': False, '8e2e': False, '8e2i': False, '8e2k': False, '8e3j': False, '8e3p': False, '8e4i': False, '8e4k': False, '8e4z': False, '8e52': False, '8e53': False, '8e54': False, '8e5u': False, '8e6g': False, '8e73': False, '8eaf': False, '8eag': False, '8eah': False, '8eai': False, '8eaj': False, '8eak': False, '8eal': False, '8eam': False, '8eaq': False, '8ecw': False, '8ed0': False, '8edk': False, '8edx': False, '8eey': False, '8egv': False, '8ehp': False, '8eid': False, '8eiu': False, '8ej4': False, '8ekb': False, '8ekc': False, '8ekf': False, '8ekn': False, '8ele': False, '8emy': False, '8en1': False, '8en4': False, '8en5': False, '8en6': False, '8ep2': False, '8ep9': False, '8epj': False, '8epo': False, '8epr': False, '8eq4': False, '8eq7': False, '8eq9': False, '8eqd': False, '8eqe': False, '8eqx': False, '8er4': False, '8esq': False, '8esr': False, '8eth': False, '8etl': False, '8eto': False, '8eu8': False, '8eud': False, '8eug': False, '8eui': False, '8eur': False, '8eut': False, '8eux': False, '8fbd': False, '8fbf': False, '8fbl': False, '8fjk': False, '8fjl': False, '8fnw': False, '8fq4': False, '8fv5': False, '8fwi': False, '8g08': False, '8g09': False, '8g0a': False, '8g0d': False, '8g0e': False, '8g5e': False, '8gms': False, '8gpu': False, '8gwa': False, '8gxq': False, '8gxs': False, '8gxu': False, '8gxw': False, '8gxx': False, '8gxy': False, '8gxz': False, '8h2i': False, '8hdr': False, '8hr7': False, '8ika': False}\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Splitting sequences into time frames: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2020</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">01</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">01</span> <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2021</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">01</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">01</span>           <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1391\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1391</span></a>\n",
       "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span>         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2023</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">03</span>-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">01</span>                                                            <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">                </span>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m                   \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Splitting sequences into time frames: \u001b[1;36m2020\u001b[0m-\u001b[1;36m01\u001b[0m-\u001b[1;36m01\u001b[0m \u001b[1;36m2021\u001b[0m-\u001b[1;36m01\u001b[0m-\u001b[1;36m01\u001b[0m           \u001b]8;id=8869;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=387515;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1391\u001b\\\u001b[2m1391\u001b[0m\u001b]8;;\u001b\\\n",
       "\u001b[2;36m                    \u001b[0m         \u001b[1;36m2023\u001b[0m-\u001b[1;36m03\u001b[0m-\u001b[1;36m01\u001b[0m                                                            \u001b[2m                \u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[03/30/23 20:27:08] </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Deposition date-derived dataset splits of sizes: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">563693</span> <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">78973</span> <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">137323</span>  <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1361\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1361</span></a>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m[03/30/23 20:27:08]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Deposition date-derived dataset splits of sizes: \u001b[1;36m563693\u001b[0m \u001b[1;36m78973\u001b[0m \u001b[1;36m137323\u001b[0m  \u001b]8;id=735435;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=923760;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1361\u001b\\\u001b[2m1361\u001b[0m\u001b]8;;\u001b\\\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Done splitting sequences                                              <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1395\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1395</span></a>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m                   \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Done splitting sequences                                              \u001b]8;id=760840;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=848347;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1395\u001b\\\u001b[2m1395\u001b[0m\u001b]8;;\u001b\\\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>pdb</th>\n",
       "      <th>chain</th>\n",
       "      <th>length</th>\n",
       "      <th>molecule_type</th>\n",
       "      <th>name</th>\n",
       "      <th>sequence</th>\n",
       "      <th>split</th>\n",
       "      <th>n_chains</th>\n",
       "      <th>ligands</th>\n",
       "      <th>source</th>\n",
       "      <th>resolution</th>\n",
       "      <th>deposition_date</th>\n",
       "      <th>experiment_type</th>\n",
       "      <th>pdb_file_available</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100d_A</td>\n",
       "      <td>100d</td>\n",
       "      <td>A</td>\n",
       "      <td>10</td>\n",
       "      <td>na</td>\n",
       "      <td>DNA/RNA (5'-R(*CP*)-D(*CP*GP*GP*CP*GP*CP*CP*GP...</td>\n",
       "      <td>CCGGCGCCGG</td>\n",
       "      <td>N/A</td>\n",
       "      <td>2</td>\n",
       "      <td>[SPM]</td>\n",
       "      <td></td>\n",
       "      <td>1.90</td>\n",
       "      <td>1994-12-05</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100d_B</td>\n",
       "      <td>100d</td>\n",
       "      <td>B</td>\n",
       "      <td>10</td>\n",
       "      <td>na</td>\n",
       "      <td>DNA/RNA (5'-R(*CP*)-D(*CP*GP*GP*CP*GP*CP*CP*GP...</td>\n",
       "      <td>CCGGCGCCGG</td>\n",
       "      <td>N/A</td>\n",
       "      <td>2</td>\n",
       "      <td>[SPM]</td>\n",
       "      <td></td>\n",
       "      <td>1.90</td>\n",
       "      <td>1994-12-05</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>101d_A</td>\n",
       "      <td>101d</td>\n",
       "      <td>A</td>\n",
       "      <td>12</td>\n",
       "      <td>na</td>\n",
       "      <td>DNA (5'-D(*CP*GP*CP*GP*AP*AP*TP*TP*(CBR)P*GP*C...</td>\n",
       "      <td>CGCGAATTCGCG</td>\n",
       "      <td>N/A</td>\n",
       "      <td>2</td>\n",
       "      <td>[CBR, MG, NT]</td>\n",
       "      <td></td>\n",
       "      <td>2.25</td>\n",
       "      <td>1994-12-14</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>101d_B</td>\n",
       "      <td>101d</td>\n",
       "      <td>B</td>\n",
       "      <td>12</td>\n",
       "      <td>na</td>\n",
       "      <td>DNA (5'-D(*CP*GP*CP*GP*AP*AP*TP*TP*(CBR)P*GP*C...</td>\n",
       "      <td>CGCGAATTCGCG</td>\n",
       "      <td>N/A</td>\n",
       "      <td>2</td>\n",
       "      <td>[CBR, MG, NT]</td>\n",
       "      <td></td>\n",
       "      <td>2.25</td>\n",
       "      <td>1994-12-14</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>101m_A</td>\n",
       "      <td>101m</td>\n",
       "      <td>A</td>\n",
       "      <td>154</td>\n",
       "      <td>protein</td>\n",
       "      <td>MYOGLOBIN</td>\n",
       "      <td>MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDR...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[HEM, NBN, SO4]</td>\n",
       "      <td>Physeter catodon</td>\n",
       "      <td>2.07</td>\n",
       "      <td>1997-12-13</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>780157</th>\n",
       "      <td>9xia_A</td>\n",
       "      <td>9xia</td>\n",
       "      <td>A</td>\n",
       "      <td>388</td>\n",
       "      <td>protein</td>\n",
       "      <td>XYLOSE ISOMERASE</td>\n",
       "      <td>MNYQPTPEDRFTFGLWTVGWQGRDPFGDATRRALDPVESVQRLAEL...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[DFR, MN]</td>\n",
       "      <td>Streptomyces rubiginosus</td>\n",
       "      <td>1.90</td>\n",
       "      <td>1990-10-11</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>780158</th>\n",
       "      <td>9xim_A</td>\n",
       "      <td>9xim</td>\n",
       "      <td>A</td>\n",
       "      <td>393</td>\n",
       "      <td>protein</td>\n",
       "      <td>D-XYLOSE ISOMERASE</td>\n",
       "      <td>SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>4</td>\n",
       "      <td>[MN, XLS]</td>\n",
       "      <td>Actinoplanes missouriensis</td>\n",
       "      <td>2.40</td>\n",
       "      <td>1992-04-03</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>780159</th>\n",
       "      <td>9xim_B</td>\n",
       "      <td>9xim</td>\n",
       "      <td>B</td>\n",
       "      <td>393</td>\n",
       "      <td>protein</td>\n",
       "      <td>D-XYLOSE ISOMERASE</td>\n",
       "      <td>SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>4</td>\n",
       "      <td>[MN, XLS]</td>\n",
       "      <td>Actinoplanes missouriensis</td>\n",
       "      <td>2.40</td>\n",
       "      <td>1992-04-03</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>780160</th>\n",
       "      <td>9xim_C</td>\n",
       "      <td>9xim</td>\n",
       "      <td>C</td>\n",
       "      <td>393</td>\n",
       "      <td>protein</td>\n",
       "      <td>D-XYLOSE ISOMERASE</td>\n",
       "      <td>SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>4</td>\n",
       "      <td>[MN, XLS]</td>\n",
       "      <td>Actinoplanes missouriensis</td>\n",
       "      <td>2.40</td>\n",
       "      <td>1992-04-03</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>780161</th>\n",
       "      <td>9xim_D</td>\n",
       "      <td>9xim</td>\n",
       "      <td>D</td>\n",
       "      <td>393</td>\n",
       "      <td>protein</td>\n",
       "      <td>D-XYLOSE ISOMERASE</td>\n",
       "      <td>SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>4</td>\n",
       "      <td>[MN, XLS]</td>\n",
       "      <td>Actinoplanes missouriensis</td>\n",
       "      <td>2.40</td>\n",
       "      <td>1992-04-03</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>563693 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            id   pdb chain  length molecule_type  \\\n",
       "0       100d_A  100d     A      10            na   \n",
       "1       100d_B  100d     B      10            na   \n",
       "2       101d_A  101d     A      12            na   \n",
       "3       101d_B  101d     B      12            na   \n",
       "4       101m_A  101m     A     154       protein   \n",
       "...        ...   ...   ...     ...           ...   \n",
       "780157  9xia_A  9xia     A     388       protein   \n",
       "780158  9xim_A  9xim     A     393       protein   \n",
       "780159  9xim_B  9xim     B     393       protein   \n",
       "780160  9xim_C  9xim     C     393       protein   \n",
       "780161  9xim_D  9xim     D     393       protein   \n",
       "\n",
       "                                                     name  \\\n",
       "0       DNA/RNA (5'-R(*CP*)-D(*CP*GP*GP*CP*GP*CP*CP*GP...   \n",
       "1       DNA/RNA (5'-R(*CP*)-D(*CP*GP*GP*CP*GP*CP*CP*GP...   \n",
       "2       DNA (5'-D(*CP*GP*CP*GP*AP*AP*TP*TP*(CBR)P*GP*C...   \n",
       "3       DNA (5'-D(*CP*GP*CP*GP*AP*AP*TP*TP*(CBR)P*GP*C...   \n",
       "4                                               MYOGLOBIN   \n",
       "...                                                   ...   \n",
       "780157                                   XYLOSE ISOMERASE   \n",
       "780158                                 D-XYLOSE ISOMERASE   \n",
       "780159                                 D-XYLOSE ISOMERASE   \n",
       "780160                                 D-XYLOSE ISOMERASE   \n",
       "780161                                 D-XYLOSE ISOMERASE   \n",
       "\n",
       "                                                 sequence split  n_chains  \\\n",
       "0                                              CCGGCGCCGG   N/A         2   \n",
       "1                                              CCGGCGCCGG   N/A         2   \n",
       "2                                            CGCGAATTCGCG   N/A         2   \n",
       "3                                            CGCGAATTCGCG   N/A         2   \n",
       "4       MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDR...   N/A         1   \n",
       "...                                                   ...   ...       ...   \n",
       "780157  MNYQPTPEDRFTFGLWTVGWQGRDPFGDATRRALDPVESVQRLAEL...   N/A         1   \n",
       "780158  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...   N/A         4   \n",
       "780159  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...   N/A         4   \n",
       "780160  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...   N/A         4   \n",
       "780161  SVQATREDKFSFGLWTVGWQARDAFGDATRTALDPVEAVHKLAEIG...   N/A         4   \n",
       "\n",
       "                ligands                      source  resolution  \\\n",
       "0                 [SPM]                                    1.90   \n",
       "1                 [SPM]                                    1.90   \n",
       "2         [CBR, MG, NT]                                    2.25   \n",
       "3         [CBR, MG, NT]                                    2.25   \n",
       "4       [HEM, NBN, SO4]            Physeter catodon        2.07   \n",
       "...                 ...                         ...         ...   \n",
       "780157        [DFR, MN]    Streptomyces rubiginosus        1.90   \n",
       "780158        [MN, XLS]  Actinoplanes missouriensis        2.40   \n",
       "780159        [MN, XLS]  Actinoplanes missouriensis        2.40   \n",
       "780160        [MN, XLS]  Actinoplanes missouriensis        2.40   \n",
       "780161        [MN, XLS]  Actinoplanes missouriensis        2.40   \n",
       "\n",
       "       deposition_date experiment_type  pdb_file_available  \n",
       "0           1994-12-05     diffraction                True  \n",
       "1           1994-12-05     diffraction                True  \n",
       "2           1994-12-14     diffraction                True  \n",
       "3           1994-12-14     diffraction                True  \n",
       "4           1997-12-13     diffraction                True  \n",
       "...                ...             ...                 ...  \n",
       "780157      1990-10-11     diffraction                True  \n",
       "780158      1992-04-03     diffraction                True  \n",
       "780159      1992-04-03     diffraction                True  \n",
       "780160      1992-04-03     diffraction                True  \n",
       "780161      1992-04-03     diffraction                True  \n",
       "\n",
       "[563693 rows x 15 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "splits = [\"train\", \"val\", \"test\"]\n",
    "\n",
    "pdb_manager = PDBManager(\n",
    "    root_dir=\".\",\n",
    "    splits=splits,\n",
    "    split_ratios=[0.8, 0.1, 0.1],\n",
    "    split_time_frames=[\n",
    "        np.datetime64(\"2020-01-01\"),\n",
    "        np.datetime64(\"2021-01-01\"),\n",
    "        np.datetime64(\"2023-03-01\"),\n",
    "    ],\n",
    ")\n",
    "pdb_manager.split_by_deposition_date(df=pdb_manager.df, update=True)\n",
    "pdb_manager.df_splits[\"train\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>pdb</th>\n",
       "      <th>chain</th>\n",
       "      <th>length</th>\n",
       "      <th>molecule_type</th>\n",
       "      <th>name</th>\n",
       "      <th>sequence</th>\n",
       "      <th>split</th>\n",
       "      <th>n_chains</th>\n",
       "      <th>ligands</th>\n",
       "      <th>source</th>\n",
       "      <th>resolution</th>\n",
       "      <th>deposition_date</th>\n",
       "      <th>experiment_type</th>\n",
       "      <th>pdb_file_available</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>408858</th>\n",
       "      <td>5qub_A</td>\n",
       "      <td>5qub</td>\n",
       "      <td>A</td>\n",
       "      <td>229</td>\n",
       "      <td>protein</td>\n",
       "      <td>RadA</td>\n",
       "      <td>MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[D48, PO4]</td>\n",
       "      <td>Pyrococcus furiosus</td>\n",
       "      <td>1.35</td>\n",
       "      <td>2020-01-27</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>408859</th>\n",
       "      <td>5quc_A</td>\n",
       "      <td>5quc</td>\n",
       "      <td>A</td>\n",
       "      <td>229</td>\n",
       "      <td>protein</td>\n",
       "      <td>RadA</td>\n",
       "      <td>MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[D48, PO4]</td>\n",
       "      <td>Pyrococcus furiosus</td>\n",
       "      <td>1.43</td>\n",
       "      <td>2020-01-27</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>408860</th>\n",
       "      <td>5qud_A</td>\n",
       "      <td>5qud</td>\n",
       "      <td>A</td>\n",
       "      <td>229</td>\n",
       "      <td>protein</td>\n",
       "      <td>RadA</td>\n",
       "      <td>MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[D48, PO4]</td>\n",
       "      <td>Pyrococcus furiosus</td>\n",
       "      <td>1.30</td>\n",
       "      <td>2020-01-27</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>408861</th>\n",
       "      <td>5que_A</td>\n",
       "      <td>5que</td>\n",
       "      <td>A</td>\n",
       "      <td>229</td>\n",
       "      <td>protein</td>\n",
       "      <td>RadA</td>\n",
       "      <td>MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[D48, PO4]</td>\n",
       "      <td>Pyrococcus furiosus</td>\n",
       "      <td>1.30</td>\n",
       "      <td>2020-01-27</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>408862</th>\n",
       "      <td>5quf_A</td>\n",
       "      <td>5quf</td>\n",
       "      <td>A</td>\n",
       "      <td>229</td>\n",
       "      <td>protein</td>\n",
       "      <td>RadA</td>\n",
       "      <td>MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[D48, PO4]</td>\n",
       "      <td>Pyrococcus furiosus</td>\n",
       "      <td>1.35</td>\n",
       "      <td>2020-01-27</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>656479</th>\n",
       "      <td>7l8t_B</td>\n",
       "      <td>7l8t</td>\n",
       "      <td>B</td>\n",
       "      <td>153</td>\n",
       "      <td>protein</td>\n",
       "      <td>BG505 SOSIP.v5.2 N241/N289 - gp41</td>\n",
       "      <td>AVGIGAVFLGFLGAAGSTMGAASMTLTVQARNLLSGIVQQQSNLLR...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>8</td>\n",
       "      <td>[BMA, MAN, NAG]</td>\n",
       "      <td>Human immunodeficiency virus 1; Macaca mulatta</td>\n",
       "      <td>3.70</td>\n",
       "      <td>2020-12-31</td>\n",
       "      <td>EM</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>656480</th>\n",
       "      <td>7l8t_D</td>\n",
       "      <td>7l8t</td>\n",
       "      <td>D</td>\n",
       "      <td>153</td>\n",
       "      <td>protein</td>\n",
       "      <td>BG505 SOSIP.v5.2 N241/N289 - gp41</td>\n",
       "      <td>AVGIGAVFLGFLGAAGSTMGAASMTLTVQARNLLSGIVQQQSNLLR...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>8</td>\n",
       "      <td>[BMA, MAN, NAG]</td>\n",
       "      <td>Human immunodeficiency virus 1; Macaca mulatta</td>\n",
       "      <td>3.70</td>\n",
       "      <td>2020-12-31</td>\n",
       "      <td>EM</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>656481</th>\n",
       "      <td>7l8t_F</td>\n",
       "      <td>7l8t</td>\n",
       "      <td>F</td>\n",
       "      <td>153</td>\n",
       "      <td>protein</td>\n",
       "      <td>BG505 SOSIP.v5.2 N241/N289 - gp41</td>\n",
       "      <td>AVGIGAVFLGFLGAAGSTMGAASMTLTVQARNLLSGIVQQQSNLLR...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>8</td>\n",
       "      <td>[BMA, MAN, NAG]</td>\n",
       "      <td>Human immunodeficiency virus 1; Macaca mulatta</td>\n",
       "      <td>3.70</td>\n",
       "      <td>2020-12-31</td>\n",
       "      <td>EM</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>656482</th>\n",
       "      <td>7l8t_H</td>\n",
       "      <td>7l8t</td>\n",
       "      <td>H</td>\n",
       "      <td>116</td>\n",
       "      <td>protein</td>\n",
       "      <td>Rh.33311 pAbC-1 - Heavy Chain</td>\n",
       "      <td>XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>8</td>\n",
       "      <td>[BMA, MAN, NAG]</td>\n",
       "      <td>Human immunodeficiency virus 1; Macaca mulatta</td>\n",
       "      <td>3.70</td>\n",
       "      <td>2020-12-31</td>\n",
       "      <td>EM</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>656483</th>\n",
       "      <td>7l8t_L</td>\n",
       "      <td>7l8t</td>\n",
       "      <td>L</td>\n",
       "      <td>102</td>\n",
       "      <td>protein</td>\n",
       "      <td>Rh.33311 pAbC-1 - Light Chain</td>\n",
       "      <td>XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>8</td>\n",
       "      <td>[BMA, MAN, NAG]</td>\n",
       "      <td>Human immunodeficiency virus 1; Macaca mulatta</td>\n",
       "      <td>3.70</td>\n",
       "      <td>2020-12-31</td>\n",
       "      <td>EM</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>78973 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            id   pdb chain  length molecule_type  \\\n",
       "408858  5qub_A  5qub     A     229       protein   \n",
       "408859  5quc_A  5quc     A     229       protein   \n",
       "408860  5qud_A  5qud     A     229       protein   \n",
       "408861  5que_A  5que     A     229       protein   \n",
       "408862  5quf_A  5quf     A     229       protein   \n",
       "...        ...   ...   ...     ...           ...   \n",
       "656479  7l8t_B  7l8t     B     153       protein   \n",
       "656480  7l8t_D  7l8t     D     153       protein   \n",
       "656481  7l8t_F  7l8t     F     153       protein   \n",
       "656482  7l8t_H  7l8t     H     116       protein   \n",
       "656483  7l8t_L  7l8t     L     102       protein   \n",
       "\n",
       "                                     name  \\\n",
       "408858                               RadA   \n",
       "408859                               RadA   \n",
       "408860                               RadA   \n",
       "408861                               RadA   \n",
       "408862                               RadA   \n",
       "...                                   ...   \n",
       "656479  BG505 SOSIP.v5.2 N241/N289 - gp41   \n",
       "656480  BG505 SOSIP.v5.2 N241/N289 - gp41   \n",
       "656481  BG505 SOSIP.v5.2 N241/N289 - gp41   \n",
       "656482      Rh.33311 pAbC-1 - Heavy Chain   \n",
       "656483      Rh.33311 pAbC-1 - Light Chain   \n",
       "\n",
       "                                                 sequence split  n_chains  \\\n",
       "408858  MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...   N/A         1   \n",
       "408859  MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...   N/A         1   \n",
       "408860  MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...   N/A         1   \n",
       "408861  MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...   N/A         1   \n",
       "408862  MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...   N/A         1   \n",
       "...                                                   ...   ...       ...   \n",
       "656479  AVGIGAVFLGFLGAAGSTMGAASMTLTVQARNLLSGIVQQQSNLLR...   N/A         8   \n",
       "656480  AVGIGAVFLGFLGAAGSTMGAASMTLTVQARNLLSGIVQQQSNLLR...   N/A         8   \n",
       "656481  AVGIGAVFLGFLGAAGSTMGAASMTLTVQARNLLSGIVQQQSNLLR...   N/A         8   \n",
       "656482  XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX...   N/A         8   \n",
       "656483  XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX...   N/A         8   \n",
       "\n",
       "                ligands                                          source  \\\n",
       "408858       [D48, PO4]                             Pyrococcus furiosus   \n",
       "408859       [D48, PO4]                             Pyrococcus furiosus   \n",
       "408860       [D48, PO4]                             Pyrococcus furiosus   \n",
       "408861       [D48, PO4]                             Pyrococcus furiosus   \n",
       "408862       [D48, PO4]                             Pyrococcus furiosus   \n",
       "...                 ...                                             ...   \n",
       "656479  [BMA, MAN, NAG]  Human immunodeficiency virus 1; Macaca mulatta   \n",
       "656480  [BMA, MAN, NAG]  Human immunodeficiency virus 1; Macaca mulatta   \n",
       "656481  [BMA, MAN, NAG]  Human immunodeficiency virus 1; Macaca mulatta   \n",
       "656482  [BMA, MAN, NAG]  Human immunodeficiency virus 1; Macaca mulatta   \n",
       "656483  [BMA, MAN, NAG]  Human immunodeficiency virus 1; Macaca mulatta   \n",
       "\n",
       "        resolution deposition_date experiment_type  pdb_file_available  \n",
       "408858        1.35      2020-01-27     diffraction                True  \n",
       "408859        1.43      2020-01-27     diffraction                True  \n",
       "408860        1.30      2020-01-27     diffraction                True  \n",
       "408861        1.30      2020-01-27     diffraction                True  \n",
       "408862        1.35      2020-01-27     diffraction                True  \n",
       "...            ...             ...             ...                 ...  \n",
       "656479        3.70      2020-12-31              EM                True  \n",
       "656480        3.70      2020-12-31              EM                True  \n",
       "656481        3.70      2020-12-31              EM                True  \n",
       "656482        3.70      2020-12-31              EM                True  \n",
       "656483        3.70      2020-12-31              EM                True  \n",
       "\n",
       "[78973 rows x 15 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pdb_manager.df_splits[\"val\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>pdb</th>\n",
       "      <th>chain</th>\n",
       "      <th>length</th>\n",
       "      <th>molecule_type</th>\n",
       "      <th>name</th>\n",
       "      <th>sequence</th>\n",
       "      <th>split</th>\n",
       "      <th>n_chains</th>\n",
       "      <th>ligands</th>\n",
       "      <th>source</th>\n",
       "      <th>resolution</th>\n",
       "      <th>deposition_date</th>\n",
       "      <th>experiment_type</th>\n",
       "      <th>pdb_file_available</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>340899</th>\n",
       "      <td>5bkf_A</td>\n",
       "      <td>5bkf</td>\n",
       "      <td>A</td>\n",
       "      <td>364</td>\n",
       "      <td>protein</td>\n",
       "      <td>Glycine receptor subunit alpha-2</td>\n",
       "      <td>KDHDSRSGKQPSQTLSPSDFLDKLMGRTSGYDARIRPNFKGPPVNV...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>5</td>\n",
       "      <td>[NAG]</td>\n",
       "      <td>Aequorea victoria; Homo sapiens</td>\n",
       "      <td>3.60</td>\n",
       "      <td>2021-03-19</td>\n",
       "      <td>EM</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>340900</th>\n",
       "      <td>5bkf_B</td>\n",
       "      <td>5bkf</td>\n",
       "      <td>B</td>\n",
       "      <td>364</td>\n",
       "      <td>protein</td>\n",
       "      <td>Glycine receptor subunit alpha-2</td>\n",
       "      <td>KDHDSRSGKQPSQTLSPSDFLDKLMGRTSGYDARIRPNFKGPPVNV...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>5</td>\n",
       "      <td>[NAG]</td>\n",
       "      <td>Aequorea victoria; Homo sapiens</td>\n",
       "      <td>3.60</td>\n",
       "      <td>2021-03-19</td>\n",
       "      <td>EM</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>340901</th>\n",
       "      <td>5bkf_C</td>\n",
       "      <td>5bkf</td>\n",
       "      <td>C</td>\n",
       "      <td>364</td>\n",
       "      <td>protein</td>\n",
       "      <td>Glycine receptor subunit alpha-2</td>\n",
       "      <td>KDHDSRSGKQPSQTLSPSDFLDKLMGRTSGYDARIRPNFKGPPVNV...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>5</td>\n",
       "      <td>[NAG]</td>\n",
       "      <td>Aequorea victoria; Homo sapiens</td>\n",
       "      <td>3.60</td>\n",
       "      <td>2021-03-19</td>\n",
       "      <td>EM</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>340902</th>\n",
       "      <td>5bkf_D</td>\n",
       "      <td>5bkf</td>\n",
       "      <td>D</td>\n",
       "      <td>364</td>\n",
       "      <td>protein</td>\n",
       "      <td>Glycine receptor subunit alpha-2</td>\n",
       "      <td>KDHDSRSGKQPSQTLSPSDFLDKLMGRTSGYDARIRPNFKGPPVNV...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>5</td>\n",
       "      <td>[NAG]</td>\n",
       "      <td>Aequorea victoria; Homo sapiens</td>\n",
       "      <td>3.60</td>\n",
       "      <td>2021-03-19</td>\n",
       "      <td>EM</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>340903</th>\n",
       "      <td>5bkf_E</td>\n",
       "      <td>5bkf</td>\n",
       "      <td>E</td>\n",
       "      <td>702</td>\n",
       "      <td>protein</td>\n",
       "      <td>Glycine receptor subunit beta,Green fluorescen...</td>\n",
       "      <td>GVAMPGAEDDVVAALEVLFQGPKSSKKGKGKKKQYLCPSQQSAEDL...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>5</td>\n",
       "      <td>[NAG]</td>\n",
       "      <td>Aequorea victoria; Homo sapiens</td>\n",
       "      <td>3.60</td>\n",
       "      <td>2021-03-19</td>\n",
       "      <td>EM</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>779966</th>\n",
       "      <td>8ika_Au</td>\n",
       "      <td>8ika</td>\n",
       "      <td>Au</td>\n",
       "      <td>265</td>\n",
       "      <td>protein</td>\n",
       "      <td>Type 1 encapsulin shell protein</td>\n",
       "      <td>MNNLYRDLAPVTEAAWAEIELEAARTFKRHIAGRRVVDVSDPGGPV...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>60</td>\n",
       "      <td>[]</td>\n",
       "      <td>Mycobacterium tuberculosis (strain ATCC 25618 ...</td>\n",
       "      <td>2.75</td>\n",
       "      <td>2023-02-28</td>\n",
       "      <td>EM</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>779967</th>\n",
       "      <td>8ika_Av</td>\n",
       "      <td>8ika</td>\n",
       "      <td>Av</td>\n",
       "      <td>265</td>\n",
       "      <td>protein</td>\n",
       "      <td>Type 1 encapsulin shell protein</td>\n",
       "      <td>MNNLYRDLAPVTEAAWAEIELEAARTFKRHIAGRRVVDVSDPGGPV...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>60</td>\n",
       "      <td>[]</td>\n",
       "      <td>Mycobacterium tuberculosis (strain ATCC 25618 ...</td>\n",
       "      <td>2.75</td>\n",
       "      <td>2023-02-28</td>\n",
       "      <td>EM</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>779968</th>\n",
       "      <td>8ika_Aw</td>\n",
       "      <td>8ika</td>\n",
       "      <td>Aw</td>\n",
       "      <td>265</td>\n",
       "      <td>protein</td>\n",
       "      <td>Type 1 encapsulin shell protein</td>\n",
       "      <td>MNNLYRDLAPVTEAAWAEIELEAARTFKRHIAGRRVVDVSDPGGPV...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>60</td>\n",
       "      <td>[]</td>\n",
       "      <td>Mycobacterium tuberculosis (strain ATCC 25618 ...</td>\n",
       "      <td>2.75</td>\n",
       "      <td>2023-02-28</td>\n",
       "      <td>EM</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>779969</th>\n",
       "      <td>8ika_Ax</td>\n",
       "      <td>8ika</td>\n",
       "      <td>Ax</td>\n",
       "      <td>265</td>\n",
       "      <td>protein</td>\n",
       "      <td>Type 1 encapsulin shell protein</td>\n",
       "      <td>MNNLYRDLAPVTEAAWAEIELEAARTFKRHIAGRRVVDVSDPGGPV...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>60</td>\n",
       "      <td>[]</td>\n",
       "      <td>Mycobacterium tuberculosis (strain ATCC 25618 ...</td>\n",
       "      <td>2.75</td>\n",
       "      <td>2023-02-28</td>\n",
       "      <td>EM</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>779970</th>\n",
       "      <td>8ika_Ay</td>\n",
       "      <td>8ika</td>\n",
       "      <td>Ay</td>\n",
       "      <td>265</td>\n",
       "      <td>protein</td>\n",
       "      <td>Type 1 encapsulin shell protein</td>\n",
       "      <td>MNNLYRDLAPVTEAAWAEIELEAARTFKRHIAGRRVVDVSDPGGPV...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>60</td>\n",
       "      <td>[]</td>\n",
       "      <td>Mycobacterium tuberculosis (strain ATCC 25618 ...</td>\n",
       "      <td>2.75</td>\n",
       "      <td>2023-02-28</td>\n",
       "      <td>EM</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>137323 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             id   pdb chain  length molecule_type  \\\n",
       "340899   5bkf_A  5bkf     A     364       protein   \n",
       "340900   5bkf_B  5bkf     B     364       protein   \n",
       "340901   5bkf_C  5bkf     C     364       protein   \n",
       "340902   5bkf_D  5bkf     D     364       protein   \n",
       "340903   5bkf_E  5bkf     E     702       protein   \n",
       "...         ...   ...   ...     ...           ...   \n",
       "779966  8ika_Au  8ika    Au     265       protein   \n",
       "779967  8ika_Av  8ika    Av     265       protein   \n",
       "779968  8ika_Aw  8ika    Aw     265       protein   \n",
       "779969  8ika_Ax  8ika    Ax     265       protein   \n",
       "779970  8ika_Ay  8ika    Ay     265       protein   \n",
       "\n",
       "                                                     name  \\\n",
       "340899                   Glycine receptor subunit alpha-2   \n",
       "340900                   Glycine receptor subunit alpha-2   \n",
       "340901                   Glycine receptor subunit alpha-2   \n",
       "340902                   Glycine receptor subunit alpha-2   \n",
       "340903  Glycine receptor subunit beta,Green fluorescen...   \n",
       "...                                                   ...   \n",
       "779966                    Type 1 encapsulin shell protein   \n",
       "779967                    Type 1 encapsulin shell protein   \n",
       "779968                    Type 1 encapsulin shell protein   \n",
       "779969                    Type 1 encapsulin shell protein   \n",
       "779970                    Type 1 encapsulin shell protein   \n",
       "\n",
       "                                                 sequence split  n_chains  \\\n",
       "340899  KDHDSRSGKQPSQTLSPSDFLDKLMGRTSGYDARIRPNFKGPPVNV...   N/A         5   \n",
       "340900  KDHDSRSGKQPSQTLSPSDFLDKLMGRTSGYDARIRPNFKGPPVNV...   N/A         5   \n",
       "340901  KDHDSRSGKQPSQTLSPSDFLDKLMGRTSGYDARIRPNFKGPPVNV...   N/A         5   \n",
       "340902  KDHDSRSGKQPSQTLSPSDFLDKLMGRTSGYDARIRPNFKGPPVNV...   N/A         5   \n",
       "340903  GVAMPGAEDDVVAALEVLFQGPKSSKKGKGKKKQYLCPSQQSAEDL...   N/A         5   \n",
       "...                                                   ...   ...       ...   \n",
       "779966  MNNLYRDLAPVTEAAWAEIELEAARTFKRHIAGRRVVDVSDPGGPV...   N/A        60   \n",
       "779967  MNNLYRDLAPVTEAAWAEIELEAARTFKRHIAGRRVVDVSDPGGPV...   N/A        60   \n",
       "779968  MNNLYRDLAPVTEAAWAEIELEAARTFKRHIAGRRVVDVSDPGGPV...   N/A        60   \n",
       "779969  MNNLYRDLAPVTEAAWAEIELEAARTFKRHIAGRRVVDVSDPGGPV...   N/A        60   \n",
       "779970  MNNLYRDLAPVTEAAWAEIELEAARTFKRHIAGRRVVDVSDPGGPV...   N/A        60   \n",
       "\n",
       "       ligands                                             source  resolution  \\\n",
       "340899   [NAG]                    Aequorea victoria; Homo sapiens        3.60   \n",
       "340900   [NAG]                    Aequorea victoria; Homo sapiens        3.60   \n",
       "340901   [NAG]                    Aequorea victoria; Homo sapiens        3.60   \n",
       "340902   [NAG]                    Aequorea victoria; Homo sapiens        3.60   \n",
       "340903   [NAG]                    Aequorea victoria; Homo sapiens        3.60   \n",
       "...        ...                                                ...         ...   \n",
       "779966      []  Mycobacterium tuberculosis (strain ATCC 25618 ...        2.75   \n",
       "779967      []  Mycobacterium tuberculosis (strain ATCC 25618 ...        2.75   \n",
       "779968      []  Mycobacterium tuberculosis (strain ATCC 25618 ...        2.75   \n",
       "779969      []  Mycobacterium tuberculosis (strain ATCC 25618 ...        2.75   \n",
       "779970      []  Mycobacterium tuberculosis (strain ATCC 25618 ...        2.75   \n",
       "\n",
       "       deposition_date experiment_type  pdb_file_available  \n",
       "340899      2021-03-19              EM                True  \n",
       "340900      2021-03-19              EM                True  \n",
       "340901      2021-03-19              EM                True  \n",
       "340902      2021-03-19              EM                True  \n",
       "340903      2021-03-19              EM                True  \n",
       "...                ...             ...                 ...  \n",
       "779966      2023-02-28              EM               False  \n",
       "779967      2023-02-28              EM               False  \n",
       "779968      2023-02-28              EM               False  \n",
       "779969      2023-02-28              EM               False  \n",
       "779970      2023-02-28              EM               False  \n",
       "\n",
       "[137323 rows x 15 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pdb_manager.df_splits[\"test\"]"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can also perform additional structural filters:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of chains:  5547\n"
     ]
    }
   ],
   "source": [
    "pdb_manager.molecule_type(type=\"protein\", update=True)\n",
    "pdb_manager.experiment_type(type=\"diffraction\", update=True)\n",
    "pdb_manager.resolution_better_than_or_equal_to(2.0, update=True)\n",
    "pdb_manager.length_longer_than(40, update=True)\n",
    "pdb_manager.length_shorter_than(401, update=True)\n",
    "pdb_manager.has_ligands([\"SO4\"], update=True) # Must contain an SO4 ligand\n",
    "pdb_manager.oligomeric(1, update=True) # Get only monomers\n",
    "print(\"Number of chains: \", len(pdb_manager.df))"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We also want to restrict ourselves to proteins that contain standard residues & are available to download in PDB format"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of chains:  5461\n"
     ]
    }
   ],
   "source": [
    "pdb_manager.remove_non_standard_alphabet_sequences(update=True)\n",
    "pdb_manager.remove_unavailable_pdbs(update=True)\n",
    "print(\"Number of chains: \", len(pdb_manager.df))"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Sequence Similarity Splits\n",
    "\n",
    "We can cluster sequences by sequence similarity using [MMSeqs2](https://github.com/soedinglab/MMseqs2):\n",
    "\n",
    "(install:  `conda install -c conda-forge -c bioconda mmseqs2`)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[03/30/23 20:28:24] </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Overwriting. Removing old cluster file                                <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1259\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1259</span></a>\n",
       "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span>         pdb_cluster_rep_seq_id_0.3_c_0.<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8.</span>fasta                                <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">                </span>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m[03/30/23 20:28:24]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Overwriting. Removing old cluster file                                \u001b]8;id=531191;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=128096;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1259\u001b\\\u001b[2m1259\u001b[0m\u001b]8;;\u001b\\\n",
       "\u001b[2;36m                    \u001b[0m         pdb_cluster_rep_seq_id_0.3_c_0.\u001b[1;36m8.\u001b[0mfasta                                \u001b[2m                \u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Creating clusters<span style=\"color: #808000; text-decoration-color: #808000\">...</span>                                                  <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1265\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1265</span></a>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m                   \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Creating clusters\u001b[33m...\u001b[0m                                                  \u001b]8;id=3443;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=899284;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1265\u001b\\\u001b[2m1265\u001b[0m\u001b]8;;\u001b\\\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Writing current selection <span style=\"font-weight: bold\">(</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5461</span> chains<span style=\"font-weight: bold\">)</span> to FASTA<span style=\"color: #808000; text-decoration-color: #808000\">...</span>                   <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1267\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1267</span></a>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m                   \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Writing current selection \u001b[1m(\u001b[0m\u001b[1;36m5461\u001b[0m chains\u001b[1m)\u001b[0m to FASTA\u001b[33m...\u001b[0m                   \u001b]8;id=96364;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=579490;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1267\u001b\\\u001b[2m1267\u001b[0m\u001b]8;;\u001b\\\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Clustering with: mmseqs easy-cluster pdb.fasta pdb_cluster tmp        <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1278\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1278</span></a>\n",
       "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span>         --min-seq-id <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.3</span> -c <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.8</span> --cov-mode <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>                                  <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">                </span>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m                   \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Clustering with: mmseqs easy-cluster pdb.fasta pdb_cluster tmp        \u001b]8;id=605272;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=742532;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1278\u001b\\\u001b[2m1278\u001b[0m\u001b]8;;\u001b\\\n",
       "\u001b[2;36m                    \u001b[0m         --min-seq-id \u001b[1;36m0.3\u001b[0m -c \u001b[1;36m0.8\u001b[0m --cov-mode \u001b[1;36m1\u001b[0m                                  \u001b[2m                \u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "easy-cluster pdb.fasta pdb_cluster tmp --min-seq-id 0.3 -c 0.8 --cov-mode 1 \n",
      "\n",
      "MMseqs Version:                     \t14.7e284\n",
      "Substitution matrix                 \taa:blosum62.out,nucl:nucleotide.out\n",
      "Seed substitution matrix            \taa:VTML80.out,nucl:nucleotide.out\n",
      "Sensitivity                         \t4\n",
      "k-mer length                        \t0\n",
      "k-score                             \tseq:2147483647,prof:2147483647\n",
      "Alphabet size                       \taa:21,nucl:5\n",
      "Max sequence length                 \t65535\n",
      "Max results per query               \t20\n",
      "Split database                      \t0\n",
      "Split mode                          \t2\n",
      "Split memory limit                  \t0\n",
      "Coverage threshold                  \t0.8\n",
      "Coverage mode                       \t1\n",
      "Compositional bias                  \t1\n",
      "Compositional bias                  \t1\n",
      "Diagonal scoring                    \ttrue\n",
      "Exact k-mer matching                \t0\n",
      "Mask residues                       \t1\n",
      "Mask residues probability           \t0.9\n",
      "Mask lower case residues            \t0\n",
      "Minimum diagonal score              \t15\n",
      "Selected taxa                       \t\n",
      "Include identical seq. id.          \tfalse\n",
      "Spaced k-mers                       \t1\n",
      "Preload mode                        \t0\n",
      "Pseudo count a                      \tsubstitution:1.100,context:1.400\n",
      "Pseudo count b                      \tsubstitution:4.100,context:5.800\n",
      "Spaced k-mer pattern                \t\n",
      "Local temporary path                \t\n",
      "Threads                             \t64\n",
      "Compressed                          \t0\n",
      "Verbosity                           \t3\n",
      "Add backtrace                       \tfalse\n",
      "Alignment mode                      \t3\n",
      "Alignment mode                      \t0\n",
      "Allow wrapped scoring               \tfalse\n",
      "E-value threshold                   \t0.001\n",
      "Seq. id. threshold                  \t0.3\n",
      "Min alignment length                \t0\n",
      "Seq. id. mode                       \t0\n",
      "Alternative alignments              \t0\n",
      "Max reject                          \t2147483647\n",
      "Max accept                          \t2147483647\n",
      "Score bias                          \t0\n",
      "Realign hits                        \tfalse\n",
      "Realign score bias                  \t-0.2\n",
      "Realign max seqs                    \t2147483647\n",
      "Correlation score weight            \t0\n",
      "Gap open cost                       \taa:11,nucl:5\n",
      "Gap extension cost                  \taa:1,nucl:2\n",
      "Zdrop                               \t40\n",
      "Rescore mode                        \t0\n",
      "Remove hits by seq. id. and coverage\tfalse\n",
      "Sort results                        \t0\n",
      "Cluster mode                        \t0\n",
      "Max connected component depth       \t1000\n",
      "Similarity type                     \t2\n",
      "Single step clustering              \tfalse\n",
      "Cascaded clustering steps           \t3\n",
      "Cluster reassign                    \tfalse\n",
      "Remove temporary files              \ttrue\n",
      "Force restart with latest tmp       \tfalse\n",
      "MPI runner                          \t\n",
      "k-mers per sequence                 \t21\n",
      "Scale k-mers per sequence           \taa:0.000,nucl:0.200\n",
      "Adjust k-mer length                 \tfalse\n",
      "Shift hash                          \t67\n",
      "Include only extendable             \tfalse\n",
      "Skip repeating k-mers               \tfalse\n",
      "Database type                       \t0\n",
      "Shuffle input database              \ttrue\n",
      "Createdb mode                       \t1\n",
      "Write lookup file                   \t0\n",
      "Offset of numeric ids               \t0\n",
      "\n",
      "createdb pdb.fasta tmp/2979816542525769739/input --dbtype 0 --shuffle 1 --createdb-mode 1 --write-lookup 0 --id-offset 0 --compressed 0 -v 3 \n",
      "\n",
      "Shuffle database cannot be combined with --createdb-mode 0\n",
      "We recompute with --shuffle 0\n",
      "Converting sequences\n",
      "[\n",
      "Time for merging to input_h: 0h 0m 0s 0ms\n",
      "Time for merging to input: 0h 0m 0s 0ms\n",
      "Database type: Aminoacid\n",
      "Time for processing: 0h 0m 0s 2ms\n",
      "Create directory tmp/2979816542525769739/clu_tmp\n",
      "cluster tmp/2979816542525769739/input tmp/2979816542525769739/clu tmp/2979816542525769739/clu_tmp --max-seqs 20 -c 0.8 --cov-mode 1 --spaced-kmer-mode 1 --alignment-mode 3 -e 0.001 --min-seq-id 0.3 --remove-tmp-files 1 \n",
      "\n",
      "Set cluster sensitivity to -s 5.000000\n",
      "Set cluster mode GREEDY MEM\n",
      "Set cluster iterations to 3\n",
      "linclust tmp/2979816542525769739/input tmp/2979816542525769739/clu_tmp/9750242695871741645/clu_redundancy tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust --cluster-mode 3 --max-iterations 1000 --similarity-type 2 --threads 64 --compressed 0 -v 3 --sub-mat 'aa:blosum62.out,nucl:nucleotide.out' -a 0 --alignment-mode 3 --alignment-output-mode 0 --wrapped-scoring 0 -e 0.001 --min-seq-id 0.3 --min-aln-len 0 --seq-id-mode 0 --alt-ali 0 -c 0.8 --cov-mode 1 --max-seq-len 65535 --comp-bias-corr 1 --comp-bias-corr-scale 1 --max-rejected 2147483647 --max-accept 2147483647 --add-self-matches 0 --db-load-mode 0 --pca substitution:1.100,context:1.400 --pcb substitution:4.100,context:5.800 --score-bias 0 --realign 0 --realign-score-bias -0.2 --realign-max-seqs 2147483647 --corr-score-weight 0 --gap-open aa:11,nucl:5 --gap-extend aa:1,nucl:2 --zdrop 40 --alph-size aa:13,nucl:5 --kmer-per-seq 21 --spaced-kmer-mode 1 --kmer-per-seq-scale aa:0.000,nucl:0.200 --adjust-kmer-len 0 --mask 0 --mask-prob 0.9 --mask-lower-case 0 -k 0 --hash-shift 67 --split-memory-limit 0 --include-only-extendable 0 --ignore-multi-kmer 0 --rescore-mode 0 --filter-hits 0 --sort-results 0 --remove-tmp-files 1 --force-reuse 0 \n",
      "\n",
      "kmermatcher tmp/2979816542525769739/input tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/pref --sub-mat 'aa:blosum62.out,nucl:nucleotide.out' --alph-size aa:13,nucl:5 --min-seq-id 0.3 --kmer-per-seq 21 --spaced-kmer-mode 1 --kmer-per-seq-scale aa:0.000,nucl:0.200 --adjust-kmer-len 0 --mask 0 --mask-prob 0.9 --mask-lower-case 0 --cov-mode 1 -k 0 -c 0.8 --max-seq-len 65535 --hash-shift 67 --split-memory-limit 0 --include-only-extendable 0 --ignore-multi-kmer 0 --threads 64 --compressed 0 -v 3 \n",
      "\n",
      "kmermatcher tmp/2979816542525769739/input tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/pref --sub-mat 'aa:blosum62.out,nucl:nucleotide.out' --alph-size aa:13,nucl:5 --min-seq-id 0.3 --kmer-per-seq 21 --spaced-kmer-mode 1 --kmer-per-seq-scale aa:0.000,nucl:0.200 --adjust-kmer-len 0 --mask 0 --mask-prob 0.9 --mask-lower-case 0 --cov-mode 1 -k 0 -c 0.8 --max-seq-len 65535 --hash-shift 67 --split-memory-limit 0 --include-only-extendable 0 --ignore-multi-kmer 0 --threads 64 --compressed 0 -v 3 \n",
      "\n",
      "Database size: 5461 type: Aminoacid\n",
      "Reduced amino acid alphabet: (A S T) (C) (D B N) (E Q Z) (F Y) (G) (H) (I V) (K R) (L J M) (P) (W) (X) \n",
      "\n",
      "Generate k-mers list for 1 split\n",
      "[=================================================================] 5.46K 0s 7ms\n",
      "Sort kmer 0h 0m 0s 2ms\n",
      "Sort by rep. sequence 0h 0m 0s 2ms\n",
      "Time for fill: 0h 0m 0s 0ms\n",
      "Time for merging to pref: 0h 0m 0s 0ms\n",
      "Time for processing: 0h 0m 0s 50ms\n",
      "rescorediagonal tmp/2979816542525769739/input tmp/2979816542525769739/input tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/pref tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/pref_rescore1 --sub-mat 'aa:blosum62.out,nucl:nucleotide.out' --rescore-mode 0 --wrapped-scoring 0 --filter-hits 0 -e 0.001 -c 0.8 -a 0 --cov-mode 1 --min-seq-id 0.5 --min-aln-len 0 --seq-id-mode 0 --add-self-matches 0 --sort-results 0 --db-load-mode 0 --threads 64 --compressed 0 -v 3 \n",
      "\n",
      "[=================================================================] 5.46K 0s 3ms\n",
      "Time for merging to pref_rescore1: 0h 0m 0s 3ms\n",
      "Time for processing: 0h 0m 0s 126ms\n",
      "clust tmp/2979816542525769739/input tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/pref_rescore1 tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/pre_clust --cluster-mode 3 --max-iterations 1000 --similarity-type 2 --threads 64 --compressed 0 -v 3 \n",
      "\n",
      "Clustering mode: Greedy Low Mem\n",
      "Total time: 0h 0m 0s 10ms\n",
      "\n",
      "Size of the sequence database: 5461\n",
      "Size of the alignment database: 5461\n",
      "Number of clusters: 1679\n",
      "\n",
      "Writing results 0h 0m 0s 0ms\n",
      "Time for merging to pre_clust: 0h 0m 0s 0ms\n",
      "Time for processing: 0h 0m 0s 36ms\n",
      "createsubdb tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/order_redundancy tmp/2979816542525769739/input tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/input_step_redundancy -v 3 --subdb-mode 1 \n",
      "\n",
      "Time for merging to input_step_redundancy: 0h 0m 0s 0ms\n",
      "Time for processing: 0h 0m 0s 7ms\n",
      "createsubdb tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/order_redundancy tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/pref tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/pref_filter1 -v 3 --subdb-mode 1 \n",
      "\n",
      "Time for merging to pref_filter1: 0h 0m 0s 0ms\n",
      "Time for processing: 0h 0m 0s 5ms\n",
      "filterdb tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/pref_filter1 tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/pref_filter2 --filter-file tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/order_redundancy --threads 64 --compressed 0 -v 3 \n",
      "\n",
      "Filtering using file(s)\n",
      "[=================================================================] 1.68K 0s 3ms\n",
      "Time for merging to pref_filter2: 0h 0m 0s 2ms\n",
      "Time for processing: 0h 0m 0s 146ms\n",
      "rescorediagonal tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/input_step_redundancy tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/input_step_redundancy tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/pref_filter2 tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/pref_rescore2 --sub-mat 'aa:blosum62.out,nucl:nucleotide.out' --rescore-mode 1 --wrapped-scoring 0 --filter-hits 1 -e 0.001 -c 0.8 -a 0 --cov-mode 1 --min-seq-id 0.3 --min-aln-len 0 --seq-id-mode 0 --add-self-matches 0 --sort-results 0 --db-load-mode 0 --threads 64 --compressed 0 -v 3 \n",
      "\n",
      "[=================================================================] 1.68K 0s 27ms\n",
      "Time for merging to pref_rescore2: 0h 0m 0s 3ms\n",
      "Time for processing: 0h 0m 0s 203ms\n",
      "align tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/input_step_redundancy tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/input_step_redundancy tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/pref_rescore2 tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/aln --sub-mat 'aa:blosum62.out,nucl:nucleotide.out' -a 0 --alignment-mode 3 --alignment-output-mode 0 --wrapped-scoring 0 -e 0.001 --min-seq-id 0.3 --min-aln-len 0 --seq-id-mode 0 --alt-ali 0 -c 0.8 --cov-mode 1 --max-seq-len 65535 --comp-bias-corr 1 --comp-bias-corr-scale 1 --max-rejected 2147483647 --max-accept 2147483647 --add-self-matches 0 --db-load-mode 0 --pca substitution:1.100,context:1.400 --pcb substitution:4.100,context:5.800 --score-bias 0 --realign 0 --realign-score-bias -0.2 --realign-max-seqs 2147483647 --corr-score-weight 0 --gap-open aa:11,nucl:5 --gap-extend aa:1,nucl:2 --zdrop 40 --threads 64 --compressed 0 -v 3 \n",
      "\n",
      "Compute score, coverage and sequence identity\n",
      "Query database size: 1679 type: Aminoacid\n",
      "Target database size: 1679 type: Aminoacid\n",
      "Calculation of alignments\n",
      "[=================================================================] 1.68K 0s 20ms\n",
      "Time for merging to aln: 0h 0m 0s 3ms\n",
      "1788 alignments calculated\n",
      "1743 sequence pairs passed the thresholds (0.974832 of overall calculated)\n",
      "1.038118 hits per query sequence\n",
      "Time for processing: 0h 0m 0s 200ms\n",
      "clust tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/input_step_redundancy tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/aln tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/clust --cluster-mode 3 --max-iterations 1000 --similarity-type 2 --threads 64 --compressed 0 -v 3 \n",
      "\n",
      "Clustering mode: Greedy Low Mem\n",
      "Total time: 0h 0m 0s 23ms\n",
      "\n",
      "Size of the sequence database: 1679\n",
      "Size of the alignment database: 1679\n",
      "Number of clusters: 1619\n",
      "\n",
      "Writing results 0h 0m 0s 0ms\n",
      "Time for merging to clust: 0h 0m 0s 0ms\n",
      "Time for processing: 0h 0m 0s 54ms\n",
      "mergeclusters tmp/2979816542525769739/input tmp/2979816542525769739/clu_tmp/9750242695871741645/clu_redundancy tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/pre_clust tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/clust --threads 64 --compressed 0 -v 3 \n",
      "\n",
      "Clustering step 1\n",
      "[=================================================================] 1.68K 0s 3ms\n",
      "Clustering step 2\n",
      "[=================================================================] 1.62K 0s 38ms\n",
      "Write merged clustering\n",
      "[=================================================================] 5.46K 0s 177ms\n",
      "Time for merging to clu_redundancy: 0h 0m 0s 2ms\n",
      "Time for processing: 0h 0m 0s 206ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/pref_filter1 -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 0ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/pref -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 0ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/pref_rescore1 -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 1ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/pre_clust -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 0ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/input_step_redundancy -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 0ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/input_step_redundancy_h -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 0ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/pref_filter2 -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 1ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/pref_rescore2 -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 1ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/aln -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 1ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/linclust/4027551054934845650/clust -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 0ms\n",
      "createsubdb tmp/2979816542525769739/clu_tmp/9750242695871741645/clu_redundancy tmp/2979816542525769739/input tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step_redundancy -v 3 --subdb-mode 1 \n",
      "\n",
      "Time for merging to input_step_redundancy: 0h 0m 0s 0ms\n",
      "Time for processing: 0h 0m 0s 6ms\n",
      "prefilter tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step_redundancy tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step_redundancy tmp/2979816542525769739/clu_tmp/9750242695871741645/pref_step0 --sub-mat 'aa:blosum62.out,nucl:nucleotide.out' --seed-sub-mat 'aa:VTML80.out,nucl:nucleotide.out' -s 1 -k 0 --k-score seq:2147483647,prof:2147483647 --alph-size aa:21,nucl:5 --max-seq-len 65535 --max-seqs 20 --split 0 --split-mode 2 --split-memory-limit 0 -c 0.8 --cov-mode 1 --comp-bias-corr 0 --comp-bias-corr-scale 1 --diag-score 0 --exact-kmer-matching 0 --mask 1 --mask-prob 0.9 --mask-lower-case 0 --min-ungapped-score 0 --add-self-matches 0 --spaced-kmer-mode 1 --db-load-mode 0 --pca substitution:1.100,context:1.400 --pcb substitution:4.100,context:5.800 --threads 64 --compressed 0 -v 3 \n",
      "\n",
      "Query database size: 1619 type: Aminoacid\n",
      "Estimated memory consumption: 984M\n",
      "Target database size: 1619 type: Aminoacid\n",
      "Index table k-mer threshold: 154 at k-mer size 6 \n",
      "Index table: counting k-mers\n",
      "[=================================================================] 1.62K 0s 24ms\n",
      "Index table: Masked residues: 5374\n",
      "Index table: fill\n",
      "[=================================================================] 1.62K 0s 12ms\n",
      "Index statistics\n",
      "Entries:          168376\n",
      "DB size:          489 MB\n",
      "Avg k-mer size:   0.002631\n",
      "Top 10 k-mers\n",
      "    SGVRHM\t56\n",
      "    DLTNFQ\t24\n",
      "    SSRNFQ\t12\n",
      "    GLPGMT\t10\n",
      "    GGDCDS\t7\n",
      "    SCGSPL\t5\n",
      "    ENYQAM\t5\n",
      "    RELFHM\t5\n",
      "    GLPGMM\t5\n",
      "    GLPGMS\t5\n",
      "Time for index table init: 0h 0m 0s 411ms\n",
      "Process prefiltering step 1 of 1\n",
      "\n",
      "k-mer similarity threshold: 154\n",
      "Starting prefiltering scores calculation (step 1 of 1)\n",
      "Query db start 1 to 1619\n",
      "Target db start 1 to 1619\n",
      "[=================================================================] 1.62K 0s 6ms\n",
      "\n",
      "2.180235 k-mers per position\n",
      "111 DB matches per sequence\n",
      "0 overflows\n",
      "0 queries produce too many hits (truncated result)\n",
      "2 sequences passed prefiltering per query sequence\n",
      "1 median result list length\n",
      "0 sequences with 0 size result lists\n",
      "Time for merging to pref_step0: 0h 0m 0s 3ms\n",
      "Time for processing: 0h 0m 0s 720ms\n",
      "align tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step_redundancy tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step_redundancy tmp/2979816542525769739/clu_tmp/9750242695871741645/pref_step0 tmp/2979816542525769739/clu_tmp/9750242695871741645/aln_step0 --sub-mat 'aa:blosum62.out,nucl:nucleotide.out' -a 0 --alignment-mode 3 --alignment-output-mode 0 --wrapped-scoring 0 -e 0.001 --min-seq-id 0.3 --min-aln-len 0 --seq-id-mode 0 --alt-ali 0 -c 0.8 --cov-mode 1 --max-seq-len 65535 --comp-bias-corr 0 --comp-bias-corr-scale 1 --max-rejected 2147483647 --max-accept 2147483647 --add-self-matches 0 --db-load-mode 0 --pca substitution:1.100,context:1.400 --pcb substitution:4.100,context:5.800 --score-bias 0 --realign 0 --realign-score-bias -0.2 --realign-max-seqs 2147483647 --corr-score-weight 0 --gap-open aa:11,nucl:5 --gap-extend aa:1,nucl:2 --zdrop 40 --threads 64 --compressed 0 -v 3 \n",
      "\n",
      "Compute score, coverage and sequence identity\n",
      "Query database size: 1619 type: Aminoacid\n",
      "Target database size: 1619 type: Aminoacid\n",
      "Calculation of alignments\n",
      "[=================================================================] 1.62K 0s 36ms\n",
      "Time for merging to aln_step0: 0h 0m 0s 3ms\n",
      "3245 alignments calculated\n",
      "2026 sequence pairs passed the thresholds (0.624345 of overall calculated)\n",
      "1.251390 hits per query sequence\n",
      "Time for processing: 0h 0m 0s 159ms\n",
      "clust tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step_redundancy tmp/2979816542525769739/clu_tmp/9750242695871741645/aln_step0 tmp/2979816542525769739/clu_tmp/9750242695871741645/clu_step0 --cluster-mode 3 --max-iterations 1000 --similarity-type 2 --threads 64 --compressed 0 -v 3 \n",
      "\n",
      "Clustering mode: Greedy Low Mem\n",
      "Total time: 0h 0m 0s 11ms\n",
      "\n",
      "Size of the sequence database: 1619\n",
      "Size of the alignment database: 1619\n",
      "Number of clusters: 1483\n",
      "\n",
      "Writing results 0h 0m 0s 0ms\n",
      "Time for merging to clu_step0: 0h 0m 0s 0ms\n",
      "Time for processing: 0h 0m 0s 31ms\n",
      "createsubdb tmp/2979816542525769739/clu_tmp/9750242695871741645/clu_step0 tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step_redundancy tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step1 -v 3 --subdb-mode 1 \n",
      "\n",
      "Time for merging to input_step1: 0h 0m 0s 0ms\n",
      "Time for processing: 0h 0m 0s 2ms\n",
      "prefilter tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step1 tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step1 tmp/2979816542525769739/clu_tmp/9750242695871741645/pref_step1 --sub-mat 'aa:blosum62.out,nucl:nucleotide.out' --seed-sub-mat 'aa:VTML80.out,nucl:nucleotide.out' -s 3 -k 0 --k-score seq:2147483647,prof:2147483647 --alph-size aa:21,nucl:5 --max-seq-len 65535 --max-seqs 20 --split 0 --split-mode 2 --split-memory-limit 0 -c 0.8 --cov-mode 1 --comp-bias-corr 1 --comp-bias-corr-scale 1 --diag-score 1 --exact-kmer-matching 0 --mask 1 --mask-prob 0.9 --mask-lower-case 0 --min-ungapped-score 15 --add-self-matches 0 --spaced-kmer-mode 1 --db-load-mode 0 --pca substitution:1.100,context:1.400 --pcb substitution:4.100,context:5.800 --threads 64 --compressed 0 -v 3 \n",
      "\n",
      "Query database size: 1483 type: Aminoacid\n",
      "Estimated memory consumption: 984M\n",
      "Target database size: 1483 type: Aminoacid\n",
      "Index table k-mer threshold: 136 at k-mer size 6 \n",
      "Index table: counting k-mers\n",
      "[=================================================================] 1.48K 0s 19ms\n",
      "Index table: Masked residues: 4877\n",
      "Index table: fill\n",
      "[=================================================================] 1.48K 0s 8ms\n",
      "Index statistics\n",
      "Entries:          300450\n",
      "DB size:          490 MB\n",
      "Avg k-mer size:   0.004695\n",
      "Top 10 k-mers\n",
      "    SSLPSH\t55\n",
      "    SGVRHM\t51\n",
      "    GVLTLY\t24\n",
      "    LGELQS\t23\n",
      "    SMGQGR\t9\n",
      "    GLPGMT\t9\n",
      "    IHDKNI\t5\n",
      "    SASPEK\t5\n",
      "    ASSPEK\t5\n",
      "    ENYQAM\t5\n",
      "Time for index table init: 0h 0m 0s 373ms\n",
      "Process prefiltering step 1 of 1\n",
      "\n",
      "k-mer similarity threshold: 136\n",
      "Starting prefiltering scores calculation (step 1 of 1)\n",
      "Query db start 1 to 1483\n",
      "Target db start 1 to 1483\n",
      "[=================================================================] 1.48K 0s 19ms\n",
      "\n",
      "24.103790 k-mers per position\n",
      "219 DB matches per sequence\n",
      "0 overflows\n",
      "0 queries produce too many hits (truncated result)\n",
      "2 sequences passed prefiltering per query sequence\n",
      "1 median result list length\n",
      "0 sequences with 0 size result lists\n",
      "Time for merging to pref_step1: 0h 0m 0s 3ms\n",
      "Time for processing: 0h 0m 0s 717ms\n",
      "align tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step1 tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step1 tmp/2979816542525769739/clu_tmp/9750242695871741645/pref_step1 tmp/2979816542525769739/clu_tmp/9750242695871741645/aln_step1 --sub-mat 'aa:blosum62.out,nucl:nucleotide.out' -a 0 --alignment-mode 3 --alignment-output-mode 0 --wrapped-scoring 0 -e 0.001 --min-seq-id 0.3 --min-aln-len 0 --seq-id-mode 0 --alt-ali 0 -c 0.8 --cov-mode 1 --max-seq-len 65535 --comp-bias-corr 1 --comp-bias-corr-scale 1 --max-rejected 2147483647 --max-accept 2147483647 --add-self-matches 0 --db-load-mode 0 --pca substitution:1.100,context:1.400 --pcb substitution:4.100,context:5.800 --score-bias 0 --realign 0 --realign-score-bias -0.2 --realign-max-seqs 2147483647 --corr-score-weight 0 --gap-open aa:11,nucl:5 --gap-extend aa:1,nucl:2 --zdrop 40 --threads 64 --compressed 0 -v 3 \n",
      "\n",
      "Compute score, coverage and sequence identity\n",
      "Query database size: 1483 type: Aminoacid\n",
      "Target database size: 1483 type: Aminoacid\n",
      "Calculation of alignments\n",
      "[=================================================================] 1.48K 0s 20ms\n",
      "Time for merging to aln_step1: 0h 0m 0s 3ms\n",
      "3282 alignments calculated\n",
      "1688 sequence pairs passed the thresholds (0.514321 of overall calculated)\n",
      "1.138233 hits per query sequence\n",
      "Time for processing: 0h 0m 0s 102ms\n",
      "clust tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step1 tmp/2979816542525769739/clu_tmp/9750242695871741645/aln_step1 tmp/2979816542525769739/clu_tmp/9750242695871741645/clu_step1 --cluster-mode 3 --max-iterations 1000 --similarity-type 2 --threads 64 --compressed 0 -v 3 \n",
      "\n",
      "Clustering mode: Greedy Low Mem\n",
      "Total time: 0h 0m 0s 50ms\n",
      "\n",
      "Size of the sequence database: 1483\n",
      "Size of the alignment database: 1483\n",
      "Number of clusters: 1396\n",
      "\n",
      "Writing results 0h 0m 0s 0ms\n",
      "Time for merging to clu_step1: 0h 0m 0s 0ms\n",
      "Time for processing: 0h 0m 0s 68ms\n",
      "createsubdb tmp/2979816542525769739/clu_tmp/9750242695871741645/clu_step1 tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step1 tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step2 -v 3 --subdb-mode 1 \n",
      "\n",
      "Time for merging to input_step2: 0h 0m 0s 0ms\n",
      "Time for processing: 0h 0m 0s 13ms\n",
      "prefilter tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step2 tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step2 tmp/2979816542525769739/clu_tmp/9750242695871741645/pref_step2 --sub-mat 'aa:blosum62.out,nucl:nucleotide.out' --seed-sub-mat 'aa:VTML80.out,nucl:nucleotide.out' -s 5 -k 0 --k-score seq:2147483647,prof:2147483647 --alph-size aa:21,nucl:5 --max-seq-len 65535 --max-seqs 20 --split 0 --split-mode 2 --split-memory-limit 0 -c 0.8 --cov-mode 1 --comp-bias-corr 1 --comp-bias-corr-scale 1 --diag-score 1 --exact-kmer-matching 0 --mask 1 --mask-prob 0.9 --mask-lower-case 0 --min-ungapped-score 15 --add-self-matches 0 --spaced-kmer-mode 1 --db-load-mode 0 --pca substitution:1.100,context:1.400 --pcb substitution:4.100,context:5.800 --threads 64 --compressed 0 -v 3 \n",
      "\n",
      "Query database size: 1396 type: Aminoacid\n",
      "Estimated memory consumption: 983M\n",
      "Target database size: 1396 type: Aminoacid\n",
      "Index table k-mer threshold: 118 at k-mer size 6 \n",
      "Index table: counting k-mers\n",
      "[=================================================================] 1.40K 0s 31ms\n",
      "Index table: Masked residues: 4780\n",
      "Index table: fill\n",
      "[=================================================================] 1.40K 0s 6ms\n",
      "Index statistics\n",
      "Entries:          295758\n",
      "DB size:          489 MB\n",
      "Avg k-mer size:   0.004621\n",
      "Top 10 k-mers\n",
      "    SSLPSH\t55\n",
      "    SGVRHM\t51\n",
      "    SSVLEN\t24\n",
      "    GVLTLY\t24\n",
      "    LGELQS\t23\n",
      "    GLPGMT\t9\n",
      "    SASPEK\t5\n",
      "    ASSPEK\t5\n",
      "    ENYQAM\t5\n",
      "    RELFHM\t5\n",
      "Time for index table init: 0h 0m 0s 352ms\n",
      "Process prefiltering step 1 of 1\n",
      "\n",
      "k-mer similarity threshold: 118\n",
      "Starting prefiltering scores calculation (step 1 of 1)\n",
      "Query db start 1 to 1396\n",
      "Target db start 1 to 1396\n",
      "[=================================================================] 1.40K 0s 88ms\n",
      "\n",
      "181.906698 k-mers per position\n",
      "395 DB matches per sequence\n",
      "0 overflows\n",
      "0 queries produce too many hits (truncated result)\n",
      "3 sequences passed prefiltering per query sequence\n",
      "2 median result list length\n",
      "0 sequences with 0 size result lists\n",
      "Time for merging to pref_step2: 0h 0m 0s 4ms\n",
      "Time for processing: 0h 0m 0s 781ms\n",
      "align tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step2 tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step2 tmp/2979816542525769739/clu_tmp/9750242695871741645/pref_step2 tmp/2979816542525769739/clu_tmp/9750242695871741645/aln_step2 --sub-mat 'aa:blosum62.out,nucl:nucleotide.out' -a 0 --alignment-mode 3 --alignment-output-mode 0 --wrapped-scoring 0 -e 0.001 --min-seq-id 0.3 --min-aln-len 0 --seq-id-mode 0 --alt-ali 0 -c 0.8 --cov-mode 1 --max-seq-len 65535 --comp-bias-corr 1 --comp-bias-corr-scale 1 --max-rejected 2147483647 --max-accept 2147483647 --add-self-matches 0 --db-load-mode 0 --pca substitution:1.100,context:1.400 --pcb substitution:4.100,context:5.800 --score-bias 0 --realign 0 --realign-score-bias -0.2 --realign-max-seqs 2147483647 --corr-score-weight 0 --gap-open aa:11,nucl:5 --gap-extend aa:1,nucl:2 --zdrop 40 --threads 64 --compressed 0 -v 3 \n",
      "\n",
      "Compute score, coverage and sequence identity\n",
      "Query database size: 1396 type: Aminoacid\n",
      "Target database size: 1396 type: Aminoacid\n",
      "Calculation of alignments\n",
      "[=================================================================] 1.40K 0s 33ms\n",
      "Time for merging to aln_step2: 0h 0m 0s 3ms\n",
      "4453 alignments calculated\n",
      "1490 sequence pairs passed the thresholds (0.334606 of overall calculated)\n",
      "1.067335 hits per query sequence\n",
      "Time for processing: 0h 0m 0s 158ms\n",
      "clust tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step2 tmp/2979816542525769739/clu_tmp/9750242695871741645/aln_step2 tmp/2979816542525769739/clu_tmp/9750242695871741645/clu_step2 --cluster-mode 3 --max-iterations 1000 --similarity-type 2 --threads 64 --compressed 0 -v 3 \n",
      "\n",
      "Clustering mode: Greedy Low Mem\n",
      "Total time: 0h 0m 0s 11ms\n",
      "\n",
      "Size of the sequence database: 1396\n",
      "Size of the alignment database: 1396\n",
      "Number of clusters: 1346\n",
      "\n",
      "Writing results 0h 0m 0s 0ms\n",
      "Time for merging to clu_step2: 0h 0m 0s 0ms\n",
      "Time for processing: 0h 0m 0s 64ms\n",
      "mergeclusters tmp/2979816542525769739/input tmp/2979816542525769739/clu tmp/2979816542525769739/clu_tmp/9750242695871741645/clu_redundancy tmp/2979816542525769739/clu_tmp/9750242695871741645/clu_step0 tmp/2979816542525769739/clu_tmp/9750242695871741645/clu_step1 tmp/2979816542525769739/clu_tmp/9750242695871741645/clu_step2 --threads 64 --compressed 0 -v 3 \n",
      "\n",
      "Clustering step 1\n",
      "[=================================================================] 1.62K 0s 27ms\n",
      "Clustering step 2\n",
      "[=================================================================] 1.48K 0s 64ms\n",
      "Clustering step 3\n",
      "[=================================================================] 1.40K 0s 76ms\n",
      "Clustering step 4\n",
      "[=================================================================] 1.35K 0s 77ms\n",
      "Write merged clustering\n",
      "[=================================================================] 5.46K 0s 224ms\n",
      "Time for merging to clu: 0h 0m 0s 1ms\n",
      "Time for processing: 0h 0m 0s 230ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/clu_redundancy -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 1ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step_redundancy -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 0ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step_redundancy_h -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 0ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/pref_step0 -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 1ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/aln_step0 -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 1ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/clu_step0 -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 0ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/pref_step1 -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 1ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/aln_step1 -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 1ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/clu_step1 -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 0ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/pref_step2 -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 1ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/aln_step2 -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 1ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/clu_step2 -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 0ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step1 -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 0ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step1_h -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 0ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step2 -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 0ms\n",
      "rmdb tmp/2979816542525769739/clu_tmp/9750242695871741645/input_step2_h -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 0ms\n",
      "createtsv tmp/2979816542525769739/input tmp/2979816542525769739/input tmp/2979816542525769739/clu tmp/2979816542525769739/cluster.tsv --threads 64 -v 3 \n",
      "\n",
      "Time for merging to cluster.tsv: 0h 0m 0s 2ms\n",
      "Time for processing: 0h 0m 0s 108ms\n",
      "result2repseq tmp/2979816542525769739/input tmp/2979816542525769739/clu tmp/2979816542525769739/clu_rep --db-load-mode 0 --compressed 0 --threads 64 -v 3 \n",
      "\n",
      "[=================================================================] 1.35K 0s 11ms\n",
      "Time for merging to clu_rep: 0h 0m 0s 26ms\n",
      "Time for processing: 0h 0m 0s 137ms\n",
      "result2flat tmp/2979816542525769739/input tmp/2979816542525769739/input tmp/2979816542525769739/clu_rep tmp/2979816542525769739/rep_seq.fasta --use-fasta-header -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 6ms\n",
      "createseqfiledb tmp/2979816542525769739/input tmp/2979816542525769739/clu tmp/2979816542525769739/clu_seqs --threads 64 -v 3 \n",
      "\n",
      "[=================================================================] 1.35K 0s 2ms\n",
      "Time for merging to clu_seqs: 0h 0m 0s 7ms\n",
      "Time for processing: 0h 0m 0s 145ms\n",
      "result2flat tmp/2979816542525769739/input tmp/2979816542525769739/input tmp/2979816542525769739/clu_seqs tmp/2979816542525769739/all_seqs.fasta -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 21ms\n",
      "rmdb tmp/2979816542525769739/input -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 0ms\n",
      "rmdb tmp/2979816542525769739/input_h -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 0ms\n",
      "rmdb tmp/2979816542525769739/clu_seqs -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 1ms\n",
      "rmdb tmp/2979816542525769739/clu_rep -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 0ms\n",
      "rmdb tmp/2979816542525769739/clu -v 3 \n",
      "\n",
      "Time for processing: 0h 0m 0s 1ms\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[03/30/23 20:28:29] </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Done clustering!                                                      <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1283\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1283</span></a>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m[03/30/23 20:28:29]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Done clustering!                                                      \u001b]8;id=746807;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=352556;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1283\u001b\\\u001b[2m1283\u001b[0m\u001b]8;;\u001b\\\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Randomly splitting clusters into ratios: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.8</span> <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.1</span> <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.1</span><span style=\"color: #808000; text-decoration-color: #808000\">...</span>               <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1187\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1187</span></a>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m                   \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Randomly splitting clusters into ratios: \u001b[1;36m0.8\u001b[0m \u001b[1;36m0.1\u001b[0m \u001b[1;36m0.1\u001b[0m\u001b[33m...\u001b[0m               \u001b]8;id=197548;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=527494;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1187\u001b\\\u001b[2m1187\u001b[0m\u001b]8;;\u001b\\\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Proportionally-derived dataset splits of sizes: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1078</span> <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">134</span> <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">134</span>          <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1114\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1114</span></a>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m                   \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Proportionally-derived dataset splits of sizes: \u001b[1;36m1078\u001b[0m \u001b[1;36m134\u001b[0m \u001b[1;36m134\u001b[0m          \u001b]8;id=474483;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=290449;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1114\u001b\\\u001b[2m1114\u001b[0m\u001b]8;;\u001b\\\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Done splitting clusters                                               <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1196\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1196</span></a>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m                   \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Done splitting clusters                                               \u001b]8;id=211776;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=542120;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1196\u001b\\\u001b[2m1196\u001b[0m\u001b]8;;\u001b\\\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of chains:  1346\n"
     ]
    }
   ],
   "source": [
    "pdb_manager.cluster(update=True, overwrite=True)\n",
    "print(\"Number of chains: \", len(pdb_manager.df))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[03/30/23 20:42:36] </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Randomly splitting clusters into ratios: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.8</span> <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.1</span> <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.1</span><span style=\"color: #808000; text-decoration-color: #808000\">...</span>               <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1187\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1187</span></a>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m[03/30/23 20:42:36]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Randomly splitting clusters into ratios: \u001b[1;36m0.8\u001b[0m \u001b[1;36m0.1\u001b[0m \u001b[1;36m0.1\u001b[0m\u001b[33m...\u001b[0m               \u001b]8;id=351831;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=515434;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1187\u001b\\\u001b[2m1187\u001b[0m\u001b]8;;\u001b\\\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Proportionally-derived dataset splits of sizes: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1078</span> <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">134</span> <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">134</span>          <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1114\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1114</span></a>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m                   \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Proportionally-derived dataset splits of sizes: \u001b[1;36m1078\u001b[0m \u001b[1;36m134\u001b[0m \u001b[1;36m134\u001b[0m          \u001b]8;id=781485;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=621063;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1114\u001b\\\u001b[2m1114\u001b[0m\u001b]8;;\u001b\\\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Done splitting clusters                                               <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1196\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1196</span></a>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m                   \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Done splitting clusters                                               \u001b]8;id=416506;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=421514;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1196\u001b\\\u001b[2m1196\u001b[0m\u001b]8;;\u001b\\\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "splits = pdb_manager.split_clusters(pdb_manager.df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>pdb</th>\n",
       "      <th>chain</th>\n",
       "      <th>length</th>\n",
       "      <th>molecule_type</th>\n",
       "      <th>name</th>\n",
       "      <th>sequence</th>\n",
       "      <th>split</th>\n",
       "      <th>n_chains</th>\n",
       "      <th>ligands</th>\n",
       "      <th>source</th>\n",
       "      <th>resolution</th>\n",
       "      <th>deposition_date</th>\n",
       "      <th>experiment_type</th>\n",
       "      <th>pdb_file_available</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>279258</th>\n",
       "      <td>4pqa_A</td>\n",
       "      <td>4pqa</td>\n",
       "      <td>A</td>\n",
       "      <td>381</td>\n",
       "      <td>protein</td>\n",
       "      <td>Succinyl-diaminopimelate desuccinylase</td>\n",
       "      <td>MTETQSLELAKELISRPSVTPDDRDCQKLLAERLHKIGFAAEELHF...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[SO4, X8Z, ZN]</td>\n",
       "      <td>Neisseria meningitidis</td>\n",
       "      <td>1.780</td>\n",
       "      <td>2014-03-01</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>486946</th>\n",
       "      <td>6hpf_A</td>\n",
       "      <td>6hpf</td>\n",
       "      <td>A</td>\n",
       "      <td>312</td>\n",
       "      <td>protein</td>\n",
       "      <td>endo-b-mannanase</td>\n",
       "      <td>APSTTPVNEKATDAAKNLLSYLVEQAANGVTLSGQQDLESAQWVSD...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[ACY, BMA, CL, GLA, MAN, NAG, SO4]</td>\n",
       "      <td>Yunnania penicillata</td>\n",
       "      <td>1.360</td>\n",
       "      <td>2018-09-20</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>93196</th>\n",
       "      <td>2hm7_A</td>\n",
       "      <td>2hm7</td>\n",
       "      <td>A</td>\n",
       "      <td>310</td>\n",
       "      <td>protein</td>\n",
       "      <td>Carboxylesterase</td>\n",
       "      <td>MPLDPVIQQVLDQLNRMPAPDYKHLSAQQFRSQQSLFPPVKKEPVA...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[SO4]</td>\n",
       "      <td>Alicyclobacillus acidocaldarius</td>\n",
       "      <td>2.000</td>\n",
       "      <td>2006-07-11</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>209977</th>\n",
       "      <td>3u62_A</td>\n",
       "      <td>3u62</td>\n",
       "      <td>A</td>\n",
       "      <td>253</td>\n",
       "      <td>protein</td>\n",
       "      <td>Shikimate dehydrogenase</td>\n",
       "      <td>MKFCIIGYPVRHSISPRLYNEYFKRAGMNHSYGMEEIPPESFDTEI...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[SO4]</td>\n",
       "      <td>Thermotoga maritima</td>\n",
       "      <td>1.450</td>\n",
       "      <td>2011-10-12</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75453</th>\n",
       "      <td>2b1y_A</td>\n",
       "      <td>2b1y</td>\n",
       "      <td>A</td>\n",
       "      <td>104</td>\n",
       "      <td>protein</td>\n",
       "      <td>hypothetical protein Atu1913</td>\n",
       "      <td>MARPNFRYTHYDLKELRAGTTLEISLSSVNNVRLMTGANFQRFTEL...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[SO4]</td>\n",
       "      <td>Agrobacterium tumefaciens str.</td>\n",
       "      <td>1.800</td>\n",
       "      <td>2005-09-16</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>510388</th>\n",
       "      <td>6mgc_A</td>\n",
       "      <td>6mgc</td>\n",
       "      <td>A</td>\n",
       "      <td>361</td>\n",
       "      <td>protein</td>\n",
       "      <td>Capsule polysaccharide export protein KpsC</td>\n",
       "      <td>MGIGIYSPGIWRIPHLEKFLAQPCQKLSLLRPVPQEVNAIAVWGHR...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[C5P, CL, SO4]</td>\n",
       "      <td>Escherichia coli O1:K1 / APEC</td>\n",
       "      <td>1.350</td>\n",
       "      <td>2018-09-13</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>772192</th>\n",
       "      <td>8dv0_A</td>\n",
       "      <td>8dv0</td>\n",
       "      <td>A</td>\n",
       "      <td>199</td>\n",
       "      <td>protein</td>\n",
       "      <td>Dephospho-CoA kinase</td>\n",
       "      <td>MAHHHHHHMLAIGITGSYASGKTFILDYLAEKGYKTFCADRCIKEL...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[EDO, SO4]</td>\n",
       "      <td>Rickettsia felis URRWXCal2</td>\n",
       "      <td>1.400</td>\n",
       "      <td>2022-07-27</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>471269</th>\n",
       "      <td>6evl_A</td>\n",
       "      <td>6evl</td>\n",
       "      <td>A</td>\n",
       "      <td>102</td>\n",
       "      <td>protein</td>\n",
       "      <td>Prolyl 4-hydroxylase subunit alpha-2</td>\n",
       "      <td>MHHHHHHMLSVDDCFGMGRSAYNEGDYYHTVLWMEQVLKQLDAGEE...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[DMS, SO4]</td>\n",
       "      <td>Homo sapiens</td>\n",
       "      <td>1.870</td>\n",
       "      <td>2017-11-02</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>151770</th>\n",
       "      <td>3exq_A</td>\n",
       "      <td>3exq</td>\n",
       "      <td>A</td>\n",
       "      <td>161</td>\n",
       "      <td>protein</td>\n",
       "      <td>NUDIX family hydrolase</td>\n",
       "      <td>MSLTRTQPVELVTMVMVTDPETQRVLVEDKVNVPWKAGHSFPGGHV...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[SO4]</td>\n",
       "      <td>Lactobacillus brevis ATCC 367</td>\n",
       "      <td>2.000</td>\n",
       "      <td>2008-10-16</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>432667</th>\n",
       "      <td>5wl5_A</td>\n",
       "      <td>5wl5</td>\n",
       "      <td>A</td>\n",
       "      <td>223</td>\n",
       "      <td>protein</td>\n",
       "      <td>Engineered Chalcone Isomerase ancR5</td>\n",
       "      <td>SHGMAVTKVTVDGIEFPPTITPPGSSKSLTLLGAGVRGIEIEAIQI...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[CL, SO4]</td>\n",
       "      <td>unidentified</td>\n",
       "      <td>1.513</td>\n",
       "      <td>2017-07-25</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1078 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            id   pdb chain  length molecule_type  \\\n",
       "279258  4pqa_A  4pqa     A     381       protein   \n",
       "486946  6hpf_A  6hpf     A     312       protein   \n",
       "93196   2hm7_A  2hm7     A     310       protein   \n",
       "209977  3u62_A  3u62     A     253       protein   \n",
       "75453   2b1y_A  2b1y     A     104       protein   \n",
       "...        ...   ...   ...     ...           ...   \n",
       "510388  6mgc_A  6mgc     A     361       protein   \n",
       "772192  8dv0_A  8dv0     A     199       protein   \n",
       "471269  6evl_A  6evl     A     102       protein   \n",
       "151770  3exq_A  3exq     A     161       protein   \n",
       "432667  5wl5_A  5wl5     A     223       protein   \n",
       "\n",
       "                                              name  \\\n",
       "279258      Succinyl-diaminopimelate desuccinylase   \n",
       "486946                            endo-b-mannanase   \n",
       "93196                             Carboxylesterase   \n",
       "209977                     Shikimate dehydrogenase   \n",
       "75453                 hypothetical protein Atu1913   \n",
       "...                                            ...   \n",
       "510388  Capsule polysaccharide export protein KpsC   \n",
       "772192                        Dephospho-CoA kinase   \n",
       "471269        Prolyl 4-hydroxylase subunit alpha-2   \n",
       "151770                      NUDIX family hydrolase   \n",
       "432667         Engineered Chalcone Isomerase ancR5   \n",
       "\n",
       "                                                 sequence split  n_chains  \\\n",
       "279258  MTETQSLELAKELISRPSVTPDDRDCQKLLAERLHKIGFAAEELHF...   N/A         1   \n",
       "486946  APSTTPVNEKATDAAKNLLSYLVEQAANGVTLSGQQDLESAQWVSD...   N/A         1   \n",
       "93196   MPLDPVIQQVLDQLNRMPAPDYKHLSAQQFRSQQSLFPPVKKEPVA...   N/A         1   \n",
       "209977  MKFCIIGYPVRHSISPRLYNEYFKRAGMNHSYGMEEIPPESFDTEI...   N/A         1   \n",
       "75453   MARPNFRYTHYDLKELRAGTTLEISLSSVNNVRLMTGANFQRFTEL...   N/A         1   \n",
       "...                                                   ...   ...       ...   \n",
       "510388  MGIGIYSPGIWRIPHLEKFLAQPCQKLSLLRPVPQEVNAIAVWGHR...   N/A         1   \n",
       "772192  MAHHHHHHMLAIGITGSYASGKTFILDYLAEKGYKTFCADRCIKEL...   N/A         1   \n",
       "471269  MHHHHHHMLSVDDCFGMGRSAYNEGDYYHTVLWMEQVLKQLDAGEE...   N/A         1   \n",
       "151770  MSLTRTQPVELVTMVMVTDPETQRVLVEDKVNVPWKAGHSFPGGHV...   N/A         1   \n",
       "432667  SHGMAVTKVTVDGIEFPPTITPPGSSKSLTLLGAGVRGIEIEAIQI...   N/A         1   \n",
       "\n",
       "                                   ligands                           source  \\\n",
       "279258                      [SO4, X8Z, ZN]           Neisseria meningitidis   \n",
       "486946  [ACY, BMA, CL, GLA, MAN, NAG, SO4]             Yunnania penicillata   \n",
       "93196                                [SO4]  Alicyclobacillus acidocaldarius   \n",
       "209977                               [SO4]              Thermotoga maritima   \n",
       "75453                                [SO4]   Agrobacterium tumefaciens str.   \n",
       "...                                    ...                              ...   \n",
       "510388                      [C5P, CL, SO4]    Escherichia coli O1:K1 / APEC   \n",
       "772192                          [EDO, SO4]       Rickettsia felis URRWXCal2   \n",
       "471269                          [DMS, SO4]                     Homo sapiens   \n",
       "151770                               [SO4]    Lactobacillus brevis ATCC 367   \n",
       "432667                           [CL, SO4]                     unidentified   \n",
       "\n",
       "        resolution deposition_date experiment_type  pdb_file_available  \n",
       "279258       1.780      2014-03-01     diffraction                True  \n",
       "486946       1.360      2018-09-20     diffraction                True  \n",
       "93196        2.000      2006-07-11     diffraction                True  \n",
       "209977       1.450      2011-10-12     diffraction                True  \n",
       "75453        1.800      2005-09-16     diffraction                True  \n",
       "...            ...             ...             ...                 ...  \n",
       "510388       1.350      2018-09-13     diffraction                True  \n",
       "772192       1.400      2022-07-27     diffraction                True  \n",
       "471269       1.870      2017-11-02     diffraction                True  \n",
       "151770       2.000      2008-10-16     diffraction                True  \n",
       "432667       1.513      2017-07-25     diffraction                True  \n",
       "\n",
       "[1078 rows x 15 columns]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "splits[\"train\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>pdb</th>\n",
       "      <th>chain</th>\n",
       "      <th>length</th>\n",
       "      <th>molecule_type</th>\n",
       "      <th>name</th>\n",
       "      <th>sequence</th>\n",
       "      <th>split</th>\n",
       "      <th>n_chains</th>\n",
       "      <th>ligands</th>\n",
       "      <th>source</th>\n",
       "      <th>resolution</th>\n",
       "      <th>deposition_date</th>\n",
       "      <th>experiment_type</th>\n",
       "      <th>pdb_file_available</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>131877</th>\n",
       "      <td>2z0z_A</td>\n",
       "      <td>2z0z</td>\n",
       "      <td>A</td>\n",
       "      <td>194</td>\n",
       "      <td>protein</td>\n",
       "      <td>Putative uncharacterized protein TTHA1799</td>\n",
       "      <td>MWAFPERFEGRHVRLEPLALAHLPAFLRHYDPEVYRFLSRAPVAPT...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[SO4]</td>\n",
       "      <td>Thermus thermophilus</td>\n",
       "      <td>2.000</td>\n",
       "      <td>2007-05-07</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43204</th>\n",
       "      <td>1py0_A</td>\n",
       "      <td>1py0</td>\n",
       "      <td>A</td>\n",
       "      <td>125</td>\n",
       "      <td>protein</td>\n",
       "      <td>Pseudoazurin</td>\n",
       "      <td>ASENIEVHMLNKGAEGAMVFEPAYIKANPGDTVTFIPVDKGHNVES...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[SO4, Y1, YMA, ZN]</td>\n",
       "      <td>Alcaligenes faecalis</td>\n",
       "      <td>2.000</td>\n",
       "      <td>2003-07-07</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4961</th>\n",
       "      <td>1bx0_A</td>\n",
       "      <td>1bx0</td>\n",
       "      <td>A</td>\n",
       "      <td>314</td>\n",
       "      <td>protein</td>\n",
       "      <td>PROTEIN (FERREDOXIN:NADP+ OXIDOREDUCTASE)</td>\n",
       "      <td>QIASDVEAPPPAPAKVEKHSKKMEEGITVNKFKPKTPYVGRCLLNT...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[FAD, PO4, SO4]</td>\n",
       "      <td>Spinacia oleracea</td>\n",
       "      <td>1.900</td>\n",
       "      <td>1998-10-10</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>436695</th>\n",
       "      <td>5xj5_A</td>\n",
       "      <td>5xj5</td>\n",
       "      <td>A</td>\n",
       "      <td>201</td>\n",
       "      <td>protein</td>\n",
       "      <td>Glycerol-3-phosphate acyltransferase</td>\n",
       "      <td>MGSALFLVIFAYLLGSITFGEVIAKLKGVDLRNVGSGNVGATNVTR...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[78M, FME, PGE, SO4]</td>\n",
       "      <td>Aquifex aeolicus</td>\n",
       "      <td>1.481</td>\n",
       "      <td>2017-04-30</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>159885</th>\n",
       "      <td>3hbm_A</td>\n",
       "      <td>3hbm</td>\n",
       "      <td>A</td>\n",
       "      <td>282</td>\n",
       "      <td>protein</td>\n",
       "      <td>UDP-sugar hydrolase</td>\n",
       "      <td>MKVLFRSDSSSQIGFGHIKRDLVLAKQYSDVSFACLPLEGSLIDEI...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[SO4]</td>\n",
       "      <td>Campylobacter jejuni subsp. jejuni</td>\n",
       "      <td>1.800</td>\n",
       "      <td>2009-05-04</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>422066</th>\n",
       "      <td>5uqj_A</td>\n",
       "      <td>5uqj</td>\n",
       "      <td>A</td>\n",
       "      <td>221</td>\n",
       "      <td>protein</td>\n",
       "      <td>U6 snRNA phosphodiesterase</td>\n",
       "      <td>GMSRFWRSFTYFEWRPTPAIHRQLQKIICKYKETFMKQEYTNPYQL...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[ACT, GOL, SO4]</td>\n",
       "      <td>Saccharomyces cerevisiae</td>\n",
       "      <td>1.800</td>\n",
       "      <td>2017-02-08</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>158414</th>\n",
       "      <td>3gy9_A</td>\n",
       "      <td>3gy9</td>\n",
       "      <td>A</td>\n",
       "      <td>150</td>\n",
       "      <td>protein</td>\n",
       "      <td>GCN5-related N-acetyltransferase</td>\n",
       "      <td>GMDVTIERVNDFDGYNWLPLLAKSSQEGFQLVERMLRNRREESFQE...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[COA, GOL, SO4]</td>\n",
       "      <td>Exiguobacterium sibiricum 255-15</td>\n",
       "      <td>1.520</td>\n",
       "      <td>2009-04-03</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>540484</th>\n",
       "      <td>6qu6_A</td>\n",
       "      <td>6qu6</td>\n",
       "      <td>A</td>\n",
       "      <td>212</td>\n",
       "      <td>protein</td>\n",
       "      <td>Fiber</td>\n",
       "      <td>MRGSHHHHHHGSGDLVAWNKKDDRRTLWTTPDTSPNCKMSTEKDSK...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[EDO, PEG, SIA, SLB, SO4]</td>\n",
       "      <td>Human adenovirus 26</td>\n",
       "      <td>1.030</td>\n",
       "      <td>2019-02-26</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>231831</th>\n",
       "      <td>4cg3_A</td>\n",
       "      <td>4cg3</td>\n",
       "      <td>A</td>\n",
       "      <td>313</td>\n",
       "      <td>protein</td>\n",
       "      <td>CUTINASE</td>\n",
       "      <td>MKYLLPTAAAGLLLLAAQPAMAMDIGINSDPANPYERGPNPTDALL...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[SO4]</td>\n",
       "      <td>THERMOBIFIDA FUSCA</td>\n",
       "      <td>1.550</td>\n",
       "      <td>2013-11-20</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>610378</th>\n",
       "      <td>7a1h_A</td>\n",
       "      <td>7a1h</td>\n",
       "      <td>A</td>\n",
       "      <td>64</td>\n",
       "      <td>protein</td>\n",
       "      <td>Subtilisin-chymotrypsin inhibitor-2A</td>\n",
       "      <td>MKTEWPELVGKSVEEAKKVILQDKPEAQIIVLPVGTIVTMEYRIDR...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[SO4]</td>\n",
       "      <td>Hordeum vulgare</td>\n",
       "      <td>1.900</td>\n",
       "      <td>2020-08-13</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>134 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            id   pdb chain  length molecule_type  \\\n",
       "131877  2z0z_A  2z0z     A     194       protein   \n",
       "43204   1py0_A  1py0     A     125       protein   \n",
       "4961    1bx0_A  1bx0     A     314       protein   \n",
       "436695  5xj5_A  5xj5     A     201       protein   \n",
       "159885  3hbm_A  3hbm     A     282       protein   \n",
       "...        ...   ...   ...     ...           ...   \n",
       "422066  5uqj_A  5uqj     A     221       protein   \n",
       "158414  3gy9_A  3gy9     A     150       protein   \n",
       "540484  6qu6_A  6qu6     A     212       protein   \n",
       "231831  4cg3_A  4cg3     A     313       protein   \n",
       "610378  7a1h_A  7a1h     A      64       protein   \n",
       "\n",
       "                                             name  \\\n",
       "131877  Putative uncharacterized protein TTHA1799   \n",
       "43204                                Pseudoazurin   \n",
       "4961    PROTEIN (FERREDOXIN:NADP+ OXIDOREDUCTASE)   \n",
       "436695       Glycerol-3-phosphate acyltransferase   \n",
       "159885                        UDP-sugar hydrolase   \n",
       "...                                           ...   \n",
       "422066                 U6 snRNA phosphodiesterase   \n",
       "158414           GCN5-related N-acetyltransferase   \n",
       "540484                                      Fiber   \n",
       "231831                                   CUTINASE   \n",
       "610378       Subtilisin-chymotrypsin inhibitor-2A   \n",
       "\n",
       "                                                 sequence split  n_chains  \\\n",
       "131877  MWAFPERFEGRHVRLEPLALAHLPAFLRHYDPEVYRFLSRAPVAPT...   N/A         1   \n",
       "43204   ASENIEVHMLNKGAEGAMVFEPAYIKANPGDTVTFIPVDKGHNVES...   N/A         1   \n",
       "4961    QIASDVEAPPPAPAKVEKHSKKMEEGITVNKFKPKTPYVGRCLLNT...   N/A         1   \n",
       "436695  MGSALFLVIFAYLLGSITFGEVIAKLKGVDLRNVGSGNVGATNVTR...   N/A         1   \n",
       "159885  MKVLFRSDSSSQIGFGHIKRDLVLAKQYSDVSFACLPLEGSLIDEI...   N/A         1   \n",
       "...                                                   ...   ...       ...   \n",
       "422066  GMSRFWRSFTYFEWRPTPAIHRQLQKIICKYKETFMKQEYTNPYQL...   N/A         1   \n",
       "158414  GMDVTIERVNDFDGYNWLPLLAKSSQEGFQLVERMLRNRREESFQE...   N/A         1   \n",
       "540484  MRGSHHHHHHGSGDLVAWNKKDDRRTLWTTPDTSPNCKMSTEKDSK...   N/A         1   \n",
       "231831  MKYLLPTAAAGLLLLAAQPAMAMDIGINSDPANPYERGPNPTDALL...   N/A         1   \n",
       "610378  MKTEWPELVGKSVEEAKKVILQDKPEAQIIVLPVGTIVTMEYRIDR...   N/A         1   \n",
       "\n",
       "                          ligands                              source  \\\n",
       "131877                      [SO4]                Thermus thermophilus   \n",
       "43204          [SO4, Y1, YMA, ZN]                Alcaligenes faecalis   \n",
       "4961              [FAD, PO4, SO4]                   Spinacia oleracea   \n",
       "436695       [78M, FME, PGE, SO4]                    Aquifex aeolicus   \n",
       "159885                      [SO4]  Campylobacter jejuni subsp. jejuni   \n",
       "...                           ...                                 ...   \n",
       "422066            [ACT, GOL, SO4]            Saccharomyces cerevisiae   \n",
       "158414            [COA, GOL, SO4]    Exiguobacterium sibiricum 255-15   \n",
       "540484  [EDO, PEG, SIA, SLB, SO4]                 Human adenovirus 26   \n",
       "231831                      [SO4]                  THERMOBIFIDA FUSCA   \n",
       "610378                      [SO4]                     Hordeum vulgare   \n",
       "\n",
       "        resolution deposition_date experiment_type  pdb_file_available  \n",
       "131877       2.000      2007-05-07     diffraction                True  \n",
       "43204        2.000      2003-07-07     diffraction                True  \n",
       "4961         1.900      1998-10-10     diffraction                True  \n",
       "436695       1.481      2017-04-30     diffraction                True  \n",
       "159885       1.800      2009-05-04     diffraction                True  \n",
       "...            ...             ...             ...                 ...  \n",
       "422066       1.800      2017-02-08     diffraction                True  \n",
       "158414       1.520      2009-04-03     diffraction                True  \n",
       "540484       1.030      2019-02-26     diffraction                True  \n",
       "231831       1.550      2013-11-20     diffraction                True  \n",
       "610378       1.900      2020-08-13     diffraction                True  \n",
       "\n",
       "[134 rows x 15 columns]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "splits[\"val\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>pdb</th>\n",
       "      <th>chain</th>\n",
       "      <th>length</th>\n",
       "      <th>molecule_type</th>\n",
       "      <th>name</th>\n",
       "      <th>sequence</th>\n",
       "      <th>split</th>\n",
       "      <th>n_chains</th>\n",
       "      <th>ligands</th>\n",
       "      <th>source</th>\n",
       "      <th>resolution</th>\n",
       "      <th>deposition_date</th>\n",
       "      <th>experiment_type</th>\n",
       "      <th>pdb_file_available</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>137068</th>\n",
       "      <td>3al2_A</td>\n",
       "      <td>3al2</td>\n",
       "      <td>A</td>\n",
       "      <td>235</td>\n",
       "      <td>protein</td>\n",
       "      <td>DNA topoisomerase 2-binding protein 1</td>\n",
       "      <td>GPLGSLKKQYIFQLSSLNPQERIDYCHLIEKLGGLVIEKQCFDPTC...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[SO4]</td>\n",
       "      <td>Homo sapiens</td>\n",
       "      <td>2.000</td>\n",
       "      <td>2010-07-22</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25399</th>\n",
       "      <td>1jov_A</td>\n",
       "      <td>1jov</td>\n",
       "      <td>A</td>\n",
       "      <td>270</td>\n",
       "      <td>protein</td>\n",
       "      <td>HI1317</td>\n",
       "      <td>MKTTLLKTLTPELHLVQHNDIPVLHLKHAVGTAKISLQGAQLISWK...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[SO4, TRS]</td>\n",
       "      <td>Haemophilus influenzae</td>\n",
       "      <td>1.570</td>\n",
       "      <td>2001-07-31</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>381776</th>\n",
       "      <td>5kta_A</td>\n",
       "      <td>5kta</td>\n",
       "      <td>A</td>\n",
       "      <td>188</td>\n",
       "      <td>protein</td>\n",
       "      <td>FdhC</td>\n",
       "      <td>MVNFNLKANTTYLRLVEENDAEFICTLRNNDKLNTYISKSTGDIKS...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[CSX, DMS, SO4]</td>\n",
       "      <td>Acinetobacter nosocomialis</td>\n",
       "      <td>1.890</td>\n",
       "      <td>2016-07-11</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>165255</th>\n",
       "      <td>3iwl_A</td>\n",
       "      <td>3iwl</td>\n",
       "      <td>A</td>\n",
       "      <td>68</td>\n",
       "      <td>protein</td>\n",
       "      <td>Copper transport protein ATOX1</td>\n",
       "      <td>MPKHEFSVDMTCGGCAEAVSRVLNKLGGVKYDIDLPNKKVCIESEH...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[PT, SO4, TCE]</td>\n",
       "      <td>Homo sapiens</td>\n",
       "      <td>1.600</td>\n",
       "      <td>2009-09-02</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228154</th>\n",
       "      <td>4bi7_A</td>\n",
       "      <td>4bi7</td>\n",
       "      <td>A</td>\n",
       "      <td>257</td>\n",
       "      <td>protein</td>\n",
       "      <td>TRIOSEPHOSPHATE ISOMERASE</td>\n",
       "      <td>MPARRPFIGGNFKCNGSLDFIKSHVAAIAAHKIPDSVDVVIAPSAV...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[PGA, SO4]</td>\n",
       "      <td>GIARDIA INTESTINALIS</td>\n",
       "      <td>1.600</td>\n",
       "      <td>2013-04-09</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>454968</th>\n",
       "      <td>6btd_A</td>\n",
       "      <td>6btd</td>\n",
       "      <td>A</td>\n",
       "      <td>222</td>\n",
       "      <td>protein</td>\n",
       "      <td>Fuculose phosphate aldolase</td>\n",
       "      <td>MLLQKEREEIVAYGKKMISSGLTKGTGGNISIFNREQGLVAISPSG...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[MN, SO4]</td>\n",
       "      <td>Bacillus thuringiensis</td>\n",
       "      <td>1.551</td>\n",
       "      <td>2017-12-06</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>479055</th>\n",
       "      <td>6gg1_A</td>\n",
       "      <td>6gg1</td>\n",
       "      <td>A</td>\n",
       "      <td>154</td>\n",
       "      <td>protein</td>\n",
       "      <td>Interleukin-24</td>\n",
       "      <td>AFHFGPCRVEGVVPQELWEAFWAVRDTLQAQDNITDVRLLRAEVLQ...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[NI, SO4]</td>\n",
       "      <td>Homo sapiens</td>\n",
       "      <td>1.300</td>\n",
       "      <td>2018-05-02</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>696311</th>\n",
       "      <td>7q68_A</td>\n",
       "      <td>7q68</td>\n",
       "      <td>A</td>\n",
       "      <td>172</td>\n",
       "      <td>protein</td>\n",
       "      <td>Thioredoxin domain-containing protein</td>\n",
       "      <td>MAPLQPGDSFPANVVFSYIPPTGSLDLTVCGRPIEYNASEALAKGT...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[GOL, SO4]</td>\n",
       "      <td>Chaetomium thermophilum var. thermophilum DSM ...</td>\n",
       "      <td>1.750</td>\n",
       "      <td>2021-11-05</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>269201</th>\n",
       "      <td>4n02_A</td>\n",
       "      <td>4n02</td>\n",
       "      <td>A</td>\n",
       "      <td>357</td>\n",
       "      <td>protein</td>\n",
       "      <td>Isopentenyl-diphosphate delta-isomerase</td>\n",
       "      <td>MRGSHHHHHHGSGSGSGIEGRITTNRKDEHILYALEQKSSYNSFDE...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[FNR, SO4]</td>\n",
       "      <td>Streptococcus pneumoniae</td>\n",
       "      <td>1.400</td>\n",
       "      <td>2013-09-30</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>475304</th>\n",
       "      <td>6fsg_A</td>\n",
       "      <td>6fsg</td>\n",
       "      <td>A</td>\n",
       "      <td>147</td>\n",
       "      <td>protein</td>\n",
       "      <td>Flavodoxin</td>\n",
       "      <td>SKLVMIFASMSGNTEEMADHIAGVIRETENEIEVIDIMDSPEASIL...</td>\n",
       "      <td>N/A</td>\n",
       "      <td>1</td>\n",
       "      <td>[FMN, GLC, SO4]</td>\n",
       "      <td>Bacillus cereus (strain ATCC 14579 / DSM 31 / ...</td>\n",
       "      <td>1.270</td>\n",
       "      <td>2018-02-19</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>134 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            id   pdb chain  length molecule_type  \\\n",
       "137068  3al2_A  3al2     A     235       protein   \n",
       "25399   1jov_A  1jov     A     270       protein   \n",
       "381776  5kta_A  5kta     A     188       protein   \n",
       "165255  3iwl_A  3iwl     A      68       protein   \n",
       "228154  4bi7_A  4bi7     A     257       protein   \n",
       "...        ...   ...   ...     ...           ...   \n",
       "454968  6btd_A  6btd     A     222       protein   \n",
       "479055  6gg1_A  6gg1     A     154       protein   \n",
       "696311  7q68_A  7q68     A     172       protein   \n",
       "269201  4n02_A  4n02     A     357       protein   \n",
       "475304  6fsg_A  6fsg     A     147       protein   \n",
       "\n",
       "                                           name  \\\n",
       "137068    DNA topoisomerase 2-binding protein 1   \n",
       "25399                                    HI1317   \n",
       "381776                                     FdhC   \n",
       "165255           Copper transport protein ATOX1   \n",
       "228154                TRIOSEPHOSPHATE ISOMERASE   \n",
       "...                                         ...   \n",
       "454968              Fuculose phosphate aldolase   \n",
       "479055                           Interleukin-24   \n",
       "696311    Thioredoxin domain-containing protein   \n",
       "269201  Isopentenyl-diphosphate delta-isomerase   \n",
       "475304                               Flavodoxin   \n",
       "\n",
       "                                                 sequence split  n_chains  \\\n",
       "137068  GPLGSLKKQYIFQLSSLNPQERIDYCHLIEKLGGLVIEKQCFDPTC...   N/A         1   \n",
       "25399   MKTTLLKTLTPELHLVQHNDIPVLHLKHAVGTAKISLQGAQLISWK...   N/A         1   \n",
       "381776  MVNFNLKANTTYLRLVEENDAEFICTLRNNDKLNTYISKSTGDIKS...   N/A         1   \n",
       "165255  MPKHEFSVDMTCGGCAEAVSRVLNKLGGVKYDIDLPNKKVCIESEH...   N/A         1   \n",
       "228154  MPARRPFIGGNFKCNGSLDFIKSHVAAIAAHKIPDSVDVVIAPSAV...   N/A         1   \n",
       "...                                                   ...   ...       ...   \n",
       "454968  MLLQKEREEIVAYGKKMISSGLTKGTGGNISIFNREQGLVAISPSG...   N/A         1   \n",
       "479055  AFHFGPCRVEGVVPQELWEAFWAVRDTLQAQDNITDVRLLRAEVLQ...   N/A         1   \n",
       "696311  MAPLQPGDSFPANVVFSYIPPTGSLDLTVCGRPIEYNASEALAKGT...   N/A         1   \n",
       "269201  MRGSHHHHHHGSGSGSGIEGRITTNRKDEHILYALEQKSSYNSFDE...   N/A         1   \n",
       "475304  SKLVMIFASMSGNTEEMADHIAGVIRETENEIEVIDIMDSPEASIL...   N/A         1   \n",
       "\n",
       "                ligands                                             source  \\\n",
       "137068            [SO4]                                       Homo sapiens   \n",
       "25399        [SO4, TRS]                             Haemophilus influenzae   \n",
       "381776  [CSX, DMS, SO4]                         Acinetobacter nosocomialis   \n",
       "165255   [PT, SO4, TCE]                                       Homo sapiens   \n",
       "228154       [PGA, SO4]                               GIARDIA INTESTINALIS   \n",
       "...                 ...                                                ...   \n",
       "454968        [MN, SO4]                             Bacillus thuringiensis   \n",
       "479055        [NI, SO4]                                       Homo sapiens   \n",
       "696311       [GOL, SO4]  Chaetomium thermophilum var. thermophilum DSM ...   \n",
       "269201       [FNR, SO4]                           Streptococcus pneumoniae   \n",
       "475304  [FMN, GLC, SO4]  Bacillus cereus (strain ATCC 14579 / DSM 31 / ...   \n",
       "\n",
       "        resolution deposition_date experiment_type  pdb_file_available  \n",
       "137068       2.000      2010-07-22     diffraction                True  \n",
       "25399        1.570      2001-07-31     diffraction                True  \n",
       "381776       1.890      2016-07-11     diffraction                True  \n",
       "165255       1.600      2009-09-02     diffraction                True  \n",
       "228154       1.600      2013-04-09     diffraction                True  \n",
       "...            ...             ...             ...                 ...  \n",
       "454968       1.551      2017-12-06     diffraction                True  \n",
       "479055       1.300      2018-05-02     diffraction                True  \n",
       "696311       1.750      2021-11-05     diffraction                True  \n",
       "269201       1.400      2013-09-30     diffraction                True  \n",
       "475304       1.270      2018-02-19     diffraction                True  \n",
       "\n",
       "[134 rows x 15 columns]"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "splits[\"test\"]"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Finally, we can export and download our splits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[03/30/23 20:43:24] </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Downloading <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">994</span> PDB files<span style=\"color: #808000; text-decoration-color: #808000\">...</span>                                          <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1476\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1476</span></a>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m[03/30/23 20:43:24]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Downloading \u001b[1;36m994\u001b[0m PDB files\u001b[33m...\u001b[0m                                          \u001b]8;id=218820;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=866225;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1476\u001b\\\u001b[2m1476\u001b[0m\u001b]8;;\u001b\\\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ca769f0ecae14350827ccd4f15ed3473",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/994 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "pdb_manager.download_pdbs(\"./pdb\", splits=splits)\n",
    "pdb_manager.export_pdbs(\n",
    "    pdb_dir=\"./pdb\", splits=splits, max_num_chains_per_pdb_code=3\n",
    ")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# I/O\n",
    "\n",
    "We can write our selections as FASTA files or download and write the relevant PDBs in our selection to disk:\n",
    "\n",
    "## CSV"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[03/29/23 01:39:08] </span><span style=\"color: #800000; text-decoration-color: #800000\">WARNING </span> You are exporting a selection that contains <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span> <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">PDB</span><span style=\"font-weight: bold\">(</span>s<span style=\"font-weight: bold\">)</span> unavailable for  <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1611\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1611</span></a>\n",
       "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span>         download in PDB format: <span style=\"font-weight: bold\">[</span><span style=\"color: #008000; text-decoration-color: #008000\">'6t9m'</span><span style=\"font-weight: bold\">]</span>                                      <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">                </span>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m[03/29/23 01:39:08]\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m You are exporting a selection that contains \u001b[1;36m1\u001b[0m \u001b[1;35mPDB\u001b[0m\u001b[1m(\u001b[0ms\u001b[1m)\u001b[0m unavailable for  \u001b]8;id=291957;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=50045;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1611\u001b\\\u001b[2m1611\u001b[0m\u001b]8;;\u001b\\\n",
       "\u001b[2;36m                    \u001b[0m         download in PDB format: \u001b[1m[\u001b[0m\u001b[32m'6t9m'\u001b[0m\u001b[1m]\u001b[0m                                      \u001b[2m                \u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Writing selection <span style=\"font-weight: bold\">(</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">80</span> chains<span style=\"font-weight: bold\">)</span> to CSV file: tmp/test.csv               <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1626\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1626</span></a>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m                   \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Writing selection \u001b[1m(\u001b[0m\u001b[1;36m80\u001b[0m chains\u001b[1m)\u001b[0m to CSV file: tmp/test.csv               \u001b]8;id=807593;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=273236;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1626\u001b\\\u001b[2m1626\u001b[0m\u001b]8;;\u001b\\\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>pdb</th>\n",
       "      <th>chain</th>\n",
       "      <th>length</th>\n",
       "      <th>molecule_type</th>\n",
       "      <th>name</th>\n",
       "      <th>sequence</th>\n",
       "      <th>split</th>\n",
       "      <th>ligands</th>\n",
       "      <th>source</th>\n",
       "      <th>resolution</th>\n",
       "      <th>deposition_date</th>\n",
       "      <th>experiment_type</th>\n",
       "      <th>pdb_file_available</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1hhz_D</td>\n",
       "      <td>1hhz</td>\n",
       "      <td>D</td>\n",
       "      <td>5</td>\n",
       "      <td>protein</td>\n",
       "      <td>CELL WALL PEPTIDE</td>\n",
       "      <td>AEKAA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>['3FG', 'DAL', 'DVC', 'FGA', 'GHP', 'MLU', 'OM...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.99</td>\n",
       "      <td>2000-12-29</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1hhz_E</td>\n",
       "      <td>1hhz</td>\n",
       "      <td>E</td>\n",
       "      <td>5</td>\n",
       "      <td>protein</td>\n",
       "      <td>CELL WALL PEPTIDE</td>\n",
       "      <td>AEKAA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>['3FG', 'DAL', 'DVC', 'FGA', 'GHP', 'MLU', 'OM...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.99</td>\n",
       "      <td>2000-12-29</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1hhz_F</td>\n",
       "      <td>1hhz</td>\n",
       "      <td>F</td>\n",
       "      <td>5</td>\n",
       "      <td>protein</td>\n",
       "      <td>CELL WALL PEPTIDE</td>\n",
       "      <td>AEKAA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>['3FG', 'DAL', 'DVC', 'FGA', 'GHP', 'MLU', 'OM...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.99</td>\n",
       "      <td>2000-12-29</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1sha_B</td>\n",
       "      <td>1sha</td>\n",
       "      <td>B</td>\n",
       "      <td>5</td>\n",
       "      <td>protein</td>\n",
       "      <td>PHOSPHOPEPTIDE A</td>\n",
       "      <td>YVPML</td>\n",
       "      <td>NaN</td>\n",
       "      <td>['PTR']</td>\n",
       "      <td>Rous sarcoma virus</td>\n",
       "      <td>1.50</td>\n",
       "      <td>1992-08-18</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1skg_B</td>\n",
       "      <td>1skg</td>\n",
       "      <td>B</td>\n",
       "      <td>5</td>\n",
       "      <td>protein</td>\n",
       "      <td>VAFRS</td>\n",
       "      <td>VAFRS</td>\n",
       "      <td>NaN</td>\n",
       "      <td>['MOH', 'SO4']</td>\n",
       "      <td>Daboia russellii pulchella; SYNTHETIC CONSTRUCT</td>\n",
       "      <td>1.21</td>\n",
       "      <td>2004-03-04</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75</th>\n",
       "      <td>7kpu_B</td>\n",
       "      <td>7kpu</td>\n",
       "      <td>B</td>\n",
       "      <td>5</td>\n",
       "      <td>protein</td>\n",
       "      <td>bisubstrate analogue (CMC-ACE-SER-GLY-ARG-GLY-...</td>\n",
       "      <td>SGRGK</td>\n",
       "      <td>NaN</td>\n",
       "      <td>['ACE', 'BTB', 'GOL', 'NH2', 'SO4', 'WZG']</td>\n",
       "      <td>Homo sapiens; SYNTHETIC CONSTRUCT</td>\n",
       "      <td>1.43</td>\n",
       "      <td>2020-11-12</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>76</th>\n",
       "      <td>7oju_H</td>\n",
       "      <td>7oju</td>\n",
       "      <td>H</td>\n",
       "      <td>5</td>\n",
       "      <td>protein</td>\n",
       "      <td>MVNAL Peptide</td>\n",
       "      <td>MVNAL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>['CMC', 'GOL', 'P6G']</td>\n",
       "      <td>Chaetomium thermophilum (strain DSM 1495 / CBS...</td>\n",
       "      <td>1.10</td>\n",
       "      <td>2021-05-17</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>77</th>\n",
       "      <td>7pul_P</td>\n",
       "      <td>7pul</td>\n",
       "      <td>P</td>\n",
       "      <td>5</td>\n",
       "      <td>protein</td>\n",
       "      <td>GLY-ALA-GLY-ALA-ALA</td>\n",
       "      <td>GAGAA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>['CA', 'MG']</td>\n",
       "      <td>Enterococcus faecalis</td>\n",
       "      <td>1.40</td>\n",
       "      <td>2021-09-30</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>78</th>\n",
       "      <td>7x70_B</td>\n",
       "      <td>7x70</td>\n",
       "      <td>B</td>\n",
       "      <td>5</td>\n",
       "      <td>protein</td>\n",
       "      <td>peptide</td>\n",
       "      <td>AVKLQ</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[]</td>\n",
       "      <td>Homo sapiens; SYNTHETIC CONSTRUCT</td>\n",
       "      <td>1.25</td>\n",
       "      <td>2022-03-08</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>79</th>\n",
       "      <td>7z5z_C</td>\n",
       "      <td>7z5z</td>\n",
       "      <td>C</td>\n",
       "      <td>5</td>\n",
       "      <td>protein</td>\n",
       "      <td>UDP-MurNAc-pentapeptide</td>\n",
       "      <td>AECAA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>['A9Z', 'DAL', 'FGA', 'GOL', 'MUB', 'UDP']</td>\n",
       "      <td>Weissella viridescens; SYNTHETIC CONSTRUCT</td>\n",
       "      <td>1.49</td>\n",
       "      <td>2022-03-10</td>\n",
       "      <td>diffraction</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>80 rows × 14 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        id   pdb chain  length molecule_type  \\\n",
       "0   1hhz_D  1hhz     D       5       protein   \n",
       "1   1hhz_E  1hhz     E       5       protein   \n",
       "2   1hhz_F  1hhz     F       5       protein   \n",
       "3   1sha_B  1sha     B       5       protein   \n",
       "4   1skg_B  1skg     B       5       protein   \n",
       "..     ...   ...   ...     ...           ...   \n",
       "75  7kpu_B  7kpu     B       5       protein   \n",
       "76  7oju_H  7oju     H       5       protein   \n",
       "77  7pul_P  7pul     P       5       protein   \n",
       "78  7x70_B  7x70     B       5       protein   \n",
       "79  7z5z_C  7z5z     C       5       protein   \n",
       "\n",
       "                                                 name sequence  split  \\\n",
       "0                                   CELL WALL PEPTIDE    AEKAA    NaN   \n",
       "1                                   CELL WALL PEPTIDE    AEKAA    NaN   \n",
       "2                                   CELL WALL PEPTIDE    AEKAA    NaN   \n",
       "3                                    PHOSPHOPEPTIDE A    YVPML    NaN   \n",
       "4                                               VAFRS    VAFRS    NaN   \n",
       "..                                                ...      ...    ...   \n",
       "75  bisubstrate analogue (CMC-ACE-SER-GLY-ARG-GLY-...    SGRGK    NaN   \n",
       "76                                      MVNAL Peptide    MVNAL    NaN   \n",
       "77                                GLY-ALA-GLY-ALA-ALA    GAGAA    NaN   \n",
       "78                                            peptide    AVKLQ    NaN   \n",
       "79                            UDP-MurNAc-pentapeptide    AECAA    NaN   \n",
       "\n",
       "                                              ligands  \\\n",
       "0   ['3FG', 'DAL', 'DVC', 'FGA', 'GHP', 'MLU', 'OM...   \n",
       "1   ['3FG', 'DAL', 'DVC', 'FGA', 'GHP', 'MLU', 'OM...   \n",
       "2   ['3FG', 'DAL', 'DVC', 'FGA', 'GHP', 'MLU', 'OM...   \n",
       "3                                             ['PTR']   \n",
       "4                                      ['MOH', 'SO4']   \n",
       "..                                                ...   \n",
       "75         ['ACE', 'BTB', 'GOL', 'NH2', 'SO4', 'WZG']   \n",
       "76                              ['CMC', 'GOL', 'P6G']   \n",
       "77                                       ['CA', 'MG']   \n",
       "78                                                 []   \n",
       "79         ['A9Z', 'DAL', 'FGA', 'GOL', 'MUB', 'UDP']   \n",
       "\n",
       "                                               source  resolution  \\\n",
       "0                                                 NaN        0.99   \n",
       "1                                                 NaN        0.99   \n",
       "2                                                 NaN        0.99   \n",
       "3                                  Rous sarcoma virus        1.50   \n",
       "4     Daboia russellii pulchella; SYNTHETIC CONSTRUCT        1.21   \n",
       "..                                                ...         ...   \n",
       "75                  Homo sapiens; SYNTHETIC CONSTRUCT        1.43   \n",
       "76  Chaetomium thermophilum (strain DSM 1495 / CBS...        1.10   \n",
       "77                              Enterococcus faecalis        1.40   \n",
       "78                  Homo sapiens; SYNTHETIC CONSTRUCT        1.25   \n",
       "79         Weissella viridescens; SYNTHETIC CONSTRUCT        1.49   \n",
       "\n",
       "   deposition_date experiment_type  pdb_file_available  \n",
       "0       2000-12-29     diffraction                True  \n",
       "1       2000-12-29     diffraction                True  \n",
       "2       2000-12-29     diffraction                True  \n",
       "3       1992-08-18     diffraction                True  \n",
       "4       2004-03-04     diffraction                True  \n",
       "..             ...             ...                 ...  \n",
       "75      2020-11-12     diffraction                True  \n",
       "76      2021-05-17     diffraction                True  \n",
       "77      2021-09-30     diffraction                True  \n",
       "78      2022-03-08     diffraction                True  \n",
       "79      2022-03-10     diffraction                True  \n",
       "\n",
       "[80 rows x 14 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "\n",
    "os.makedirs(\"tmp/\", exist_ok=True)\n",
    "# Write selection to disk\n",
    "manager.to_csv(\"tmp/test.csv\")\n",
    "\n",
    "# Read selection from disk\n",
    "sel = pd.read_csv(\"tmp/test.csv\")\n",
    "sel"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## FASTA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[03/29/23 01:40:04] </span><span style=\"color: #800000; text-decoration-color: #800000\">WARNING </span> You are exporting a selection that contains <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span> <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">PDB</span><span style=\"font-weight: bold\">(</span>s<span style=\"font-weight: bold\">)</span> unavailable for  <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1611\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1611</span></a>\n",
       "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span>         download in PDB format: <span style=\"font-weight: bold\">[</span><span style=\"color: #008000; text-decoration-color: #008000\">'6t9m'</span><span style=\"font-weight: bold\">]</span>                                      <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">                </span>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m[03/29/23 01:40:04]\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m You are exporting a selection that contains \u001b[1;36m1\u001b[0m \u001b[1;35mPDB\u001b[0m\u001b[1m(\u001b[0ms\u001b[1m)\u001b[0m unavailable for  \u001b]8;id=749561;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=480055;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1611\u001b\\\u001b[2m1611\u001b[0m\u001b]8;;\u001b\\\n",
       "\u001b[2;36m                    \u001b[0m         download in PDB format: \u001b[1m[\u001b[0m\u001b[32m'6t9m'\u001b[0m\u001b[1m]\u001b[0m                                      \u001b[2m                \u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span><span style=\"color: #800000; text-decoration-color: #800000\">WARNING </span> You are exporting a selection that contains <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span> <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">PDB</span><span style=\"font-weight: bold\">(</span>s<span style=\"font-weight: bold\">)</span> unavailable for  <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1611\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1611</span></a>\n",
       "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span>         download in PDB format: <span style=\"font-weight: bold\">[</span><span style=\"color: #008000; text-decoration-color: #008000\">'6t9m'</span><span style=\"font-weight: bold\">]</span>                                      <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">                </span>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m                   \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m You are exporting a selection that contains \u001b[1;36m1\u001b[0m \u001b[1;35mPDB\u001b[0m\u001b[1m(\u001b[0ms\u001b[1m)\u001b[0m unavailable for  \u001b]8;id=155718;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=990508;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1611\u001b\\\u001b[2m1611\u001b[0m\u001b]8;;\u001b\\\n",
       "\u001b[2;36m                    \u001b[0m         download in PDB format: \u001b[1m[\u001b[0m\u001b[32m'6t9m'\u001b[0m\u001b[1m]\u001b[0m                                      \u001b[2m                \u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'1hhz_D': 'AEKAA', '1hhz_E': 'AEKAA', '1hhz_F': 'AEKAA', '1sha_B': 'YVPML', '1skg_B': 'VAFRS', '1tjk_I': 'FLSTK', '2d5w_C': 'ASKPK', '2d5w_D': 'ASKTK', '3drj_B': 'AHAKA', '3hds_E': 'ASWSA', '3hds_F': 'ASWSA', '4j78_B': 'KTKLL', '4j82_C': 'KSHQE', '4j82_D': 'KSHQE', '4j84_C': 'ARKLD', '4j84_D': 'ARKLD', '4l9p_C': 'KCVVM', '4olr_A': 'YVVFV', '4olr_B': 'YVVFV', '4qxx_Z': 'GNLVS', '4v3i_B': 'DLTRP', '4x3o_C': 'PKKTG', '4zhb_B': 'VDAVN', '5ctv_C': 'AEKAA', '5ctv_E': 'AEKAA', '5n99_C': 'NQPWQ', '5n99_E': 'NQPWQ', '5n99_F': 'NQPWQ', '5n99_H': 'NQPWQ', '5n99_J': 'NQPWQ', '5n99_L': 'NQPWQ', '5n99_N': 'NQPWQ', '5n99_P': 'NQPWQ', '5n99_R': 'NQPWQ', '5n99_T': 'NQPWQ', '5n99_V': 'NQPWQ', '5n99_X': 'NQPWQ', '5nf0_F': 'GGGGG', '5njf_E': 'AAAAA', '5njf_F': 'AAAAA', '5onp_B': 'GAIIG', '5onq_B': 'GAIIG', '5r42_E': 'TPGVY', '5r43_E': 'TPGVY', '5r44_E': 'TPGVY', '5r45_E': 'TPGVY', '5r46_E': 'TPGVY', '5r47_E': 'TPGVY', '5r48_E': 'TPGVY', '5r49_E': 'TPGVY', '5r4a_E': 'TPGVY', '5r4b_E': 'TPGVY', '5r4c_E': 'TPGVY', '5r4d_D': 'GSWPW', '5r4d_E': 'TPGVY', '6ax4_C': 'PLHST', '6diy_A': 'YTFGQ', '6eaw_I': 'CTKSI', '6f4r_B': 'CARAY', '6f4s_B': 'CARAY', '6f4t_B': 'CARAY', '6fbb_P': 'SRSSP', '6rd2_C': 'TEDEL', '6rd2_D': 'TEDEL', '6slg_B': 'AALAF', '6t9m_BBB': 'GPAMK', '6y3m_P': 'QSYTV', '6z00_C': 'MVNAL', '6z00_D': 'MVNAL', '7ett_B': 'QFPFV', '7etu_B': 'SFPFT', '7etv_B': 'DFPFV', '7kd7_E': 'SGRGK', '7kd7_B': 'SGRGK', '7kpu_E': 'SGRGK', '7kpu_B': 'SGRGK', '7oju_H': 'MVNAL', '7pul_P': 'GAGAA', '7x70_B': 'AVKLQ', '7z5z_C': 'AECAA'}\n"
     ]
    }
   ],
   "source": [
    "from graphein.protein.utils import read_fasta\n",
    "# Write selection to a fasta file\n",
    "manager.to_fasta(\"tmp/test.fasta\")\n",
    "\n",
    "# Load selection from a fasta file\n",
    "fs = read_fasta(\"tmp/test.fasta\")\n",
    "print(fs)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Downloading PDBs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[03/29/23 01:40:19] </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Downloading <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">55</span> PDB files<span style=\"color: #808000; text-decoration-color: #808000\">...</span>                                           <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1456\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1456</span></a>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m[03/29/23 01:40:19]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Downloading \u001b[1;36m55\u001b[0m PDB files\u001b[33m...\u001b[0m                                           \u001b]8;id=304186;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=254463;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1456\u001b\\\u001b[2m1456\u001b[0m\u001b]8;;\u001b\\\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "54ed67640faa4c94b0f3c4e6f69294b3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/55 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "manager.download_pdbs(\"tmp/pdbs/\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['5r46.pdb',\n",
       " '4j84.pdb',\n",
       " '6y3m.pdb',\n",
       " '5ctv.pdb',\n",
       " '4qxx.pdb',\n",
       " '5njf.pdb',\n",
       " '7etu.pdb',\n",
       " '6f4s.pdb',\n",
       " '6z00.pdb',\n",
       " '7oju.pdb',\n",
       " '1hhz.pdb',\n",
       " '4j82.pdb',\n",
       " '5r48.pdb',\n",
       " '1skg.pdb',\n",
       " '5r45.pdb',\n",
       " '4zhb.pdb',\n",
       " '5r42.pdb',\n",
       " '4v3i.pdb',\n",
       " '5onp.pdb',\n",
       " '5r47.pdb',\n",
       " '1sha.pdb',\n",
       " '4x3o.pdb',\n",
       " '5r4c.pdb',\n",
       " '5onq.pdb',\n",
       " '5n99.pdb',\n",
       " '3drj.pdb',\n",
       " '7kd7.pdb',\n",
       " '5r4a.pdb',\n",
       " '7z5z.pdb',\n",
       " '7kpu.pdb',\n",
       " '5r4b.pdb',\n",
       " '5r4d.pdb',\n",
       " '3hds.pdb',\n",
       " '5r44.pdb',\n",
       " '6diy.pdb',\n",
       " '6rd2.pdb',\n",
       " '6fbb.pdb',\n",
       " '5r43.pdb',\n",
       " '6f4r.pdb',\n",
       " '2d5w.pdb',\n",
       " '7ett.pdb',\n",
       " '5r49.pdb',\n",
       " '7etv.pdb',\n",
       " '6eaw.pdb',\n",
       " '7pul.pdb',\n",
       " '6slg.pdb',\n",
       " '6ax4.pdb',\n",
       " '6f4t.pdb',\n",
       " '1tjk.pdb',\n",
       " '4l9p.pdb',\n",
       " '5nf0.pdb',\n",
       " '4olr.pdb',\n",
       " '4j78.pdb',\n",
       " '7x70.pdb']"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "os.listdir(\"tmp/pdbs\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Writing Individual Chains\n",
    "\n",
    "We can also extract the individual chains from the PDB files in our selection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[03/29/23 01:45:08] </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Downloading <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">54</span> PDB files<span style=\"color: #808000; text-decoration-color: #808000\">...</span>                                           <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1504\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1504</span></a>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m[03/29/23 01:45:08]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Downloading \u001b[1;36m54\u001b[0m PDB files\u001b[33m...\u001b[0m                                           \u001b]8;id=132264;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=24817;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1504\u001b\\\u001b[2m1504\u001b[0m\u001b]8;;\u001b\\\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a849ccd67465484eb4dd288e453c5a81",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/54 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[03/29/23 01:46:00] </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Done downloading PDB files                                            <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1508\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1508</span></a>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m[03/29/23 01:46:00]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Done downloading PDB files                                            \u001b]8;id=768148;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=81637;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1508\u001b\\\u001b[2m1508\u001b[0m\u001b]8;;\u001b\\\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">                    </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Extracting chains<span style=\"color: #808000; text-decoration-color: #808000\">...</span>                                                  <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1511\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1511</span></a>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m                   \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Extracting chains\u001b[33m...\u001b[0m                                                  \u001b]8;id=368030;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=807209;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1511\u001b\\\u001b[2m1511\u001b[0m\u001b]8;;\u001b\\\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 0/54 [00:00<?, ?it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "  6%|▌         | 3/54 [00:00<00:02, 22.50it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 11%|█         | 6/54 [00:00<00:03, 14.05it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 15%|█▍        | 8/54 [00:00<00:03, 13.76it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 19%|█▊        | 10/54 [00:00<00:03, 11.58it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 22%|██▏       | 12/54 [00:01<00:04, 10.14it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 28%|██▊       | 15/54 [00:01<00:03, 12.66it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 31%|███▏      | 17/54 [00:01<00:02, 13.81it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 35%|███▌      | 19/54 [00:01<00:04,  8.09it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 39%|███▉      | 21/54 [00:02<00:05,  6.18it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 43%|████▎     | 23/54 [00:02<00:05,  5.58it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 46%|████▋     | 25/54 [00:02<00:04,  6.78it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 50%|█████     | 27/54 [00:03<00:03,  8.10it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 54%|█████▎    | 29/54 [00:03<00:02,  9.42it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 57%|█████▋    | 31/54 [00:03<00:02, 10.56it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 61%|██████    | 33/54 [00:03<00:01, 11.51it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 65%|██████▍   | 35/54 [00:03<00:01, 11.68it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 69%|██████▊   | 37/54 [00:03<00:01, 13.27it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 72%|███████▏  | 39/54 [00:03<00:01, 11.92it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 76%|███████▌  | 41/54 [00:04<00:01, 11.53it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 80%|███████▉  | 43/54 [00:04<00:00, 11.41it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 83%|████████▎ | 45/54 [00:04<00:00, 12.32it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 91%|█████████ | 49/54 [00:04<00:00, 14.77it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 94%|█████████▍| 51/54 [00:04<00:00, 13.33it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      " 98%|█████████▊| 53/54 [00:04<00:00, 13.44it/s]/home/atj39/mambaforge/envs/graphein/lib/python3.9/site-packages/biopandas/pdb/pandas_pdb.py:624: UserWarning: Column model_id is not an expected column and will be skipped.\n",
      "  warn(\n",
      "100%|██████████| 54/54 [00:05<00:00, 10.80it/s]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[03/29/23 01:46:05] </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Done extracting chains                                                <a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">pdb_data.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1520\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1520</span></a>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[2;36m[03/29/23 01:46:05]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Done extracting chains                                                \u001b]8;id=451212;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py\u001b\\\u001b[2mpdb_data.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=93851;file:///home/atj39/github/graphein/graphein/ml/datasets/pdb_data.py#1520\u001b\\\u001b[2m1520\u001b[0m\u001b]8;;\u001b\\\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "[PosixPath('pdb/1hhz_D.pdb'),\n",
       " PosixPath('pdb/1hhz_E.pdb'),\n",
       " PosixPath('pdb/1hhz_F.pdb'),\n",
       " PosixPath('pdb/1sha_B.pdb'),\n",
       " PosixPath('pdb/1skg_B.pdb'),\n",
       " PosixPath('pdb/1tjk_I.pdb'),\n",
       " PosixPath('pdb/2d5w_C.pdb'),\n",
       " PosixPath('pdb/2d5w_D.pdb'),\n",
       " PosixPath('pdb/3drj_B.pdb'),\n",
       " PosixPath('pdb/3hds_E.pdb'),\n",
       " PosixPath('pdb/3hds_F.pdb'),\n",
       " PosixPath('pdb/4j78_B.pdb'),\n",
       " PosixPath('pdb/4j82_C.pdb'),\n",
       " PosixPath('pdb/4j82_D.pdb'),\n",
       " PosixPath('pdb/4j84_C.pdb'),\n",
       " PosixPath('pdb/4j84_D.pdb'),\n",
       " PosixPath('pdb/4l9p_C.pdb'),\n",
       " PosixPath('pdb/4olr_A.pdb'),\n",
       " PosixPath('pdb/4olr_B.pdb'),\n",
       " PosixPath('pdb/4qxx_Z.pdb'),\n",
       " PosixPath('pdb/4v3i_B.pdb'),\n",
       " PosixPath('pdb/4x3o_C.pdb'),\n",
       " PosixPath('pdb/4zhb_B.pdb'),\n",
       " PosixPath('pdb/5ctv_C.pdb'),\n",
       " PosixPath('pdb/5ctv_E.pdb'),\n",
       " PosixPath('pdb/5n99_C.pdb'),\n",
       " PosixPath('pdb/5n99_E.pdb'),\n",
       " PosixPath('pdb/5n99_F.pdb'),\n",
       " PosixPath('pdb/5n99_H.pdb'),\n",
       " PosixPath('pdb/5n99_J.pdb'),\n",
       " PosixPath('pdb/5n99_L.pdb'),\n",
       " PosixPath('pdb/5n99_N.pdb'),\n",
       " PosixPath('pdb/5n99_P.pdb'),\n",
       " PosixPath('pdb/5n99_R.pdb'),\n",
       " PosixPath('pdb/5n99_T.pdb'),\n",
       " PosixPath('pdb/5n99_V.pdb'),\n",
       " PosixPath('pdb/5n99_X.pdb'),\n",
       " PosixPath('pdb/5nf0_F.pdb'),\n",
       " PosixPath('pdb/5njf_E.pdb'),\n",
       " PosixPath('pdb/5njf_F.pdb'),\n",
       " PosixPath('pdb/5onp_B.pdb'),\n",
       " PosixPath('pdb/5onq_B.pdb'),\n",
       " PosixPath('pdb/5r42_E.pdb'),\n",
       " PosixPath('pdb/5r43_E.pdb'),\n",
       " PosixPath('pdb/5r44_E.pdb'),\n",
       " PosixPath('pdb/5r45_E.pdb'),\n",
       " PosixPath('pdb/5r46_E.pdb'),\n",
       " PosixPath('pdb/5r47_E.pdb'),\n",
       " PosixPath('pdb/5r48_E.pdb'),\n",
       " PosixPath('pdb/5r49_E.pdb'),\n",
       " PosixPath('pdb/5r4a_E.pdb'),\n",
       " PosixPath('pdb/5r4b_E.pdb'),\n",
       " PosixPath('pdb/5r4c_E.pdb'),\n",
       " PosixPath('pdb/5r4d_D.pdb'),\n",
       " PosixPath('pdb/5r4d_E.pdb'),\n",
       " PosixPath('pdb/6ax4_C.pdb'),\n",
       " PosixPath('pdb/6diy_A.pdb'),\n",
       " PosixPath('pdb/6eaw_I.pdb'),\n",
       " PosixPath('pdb/6f4r_B.pdb'),\n",
       " PosixPath('pdb/6f4s_B.pdb'),\n",
       " PosixPath('pdb/6f4t_B.pdb'),\n",
       " PosixPath('pdb/6fbb_P.pdb'),\n",
       " PosixPath('pdb/6rd2_C.pdb'),\n",
       " PosixPath('pdb/6rd2_D.pdb'),\n",
       " PosixPath('pdb/6slg_B.pdb'),\n",
       " PosixPath('pdb/6y3m_P.pdb'),\n",
       " PosixPath('pdb/6z00_C.pdb'),\n",
       " PosixPath('pdb/6z00_D.pdb'),\n",
       " PosixPath('pdb/7ett_B.pdb'),\n",
       " PosixPath('pdb/7etu_B.pdb'),\n",
       " PosixPath('pdb/7etv_B.pdb'),\n",
       " PosixPath('pdb/7kd7_E.pdb'),\n",
       " PosixPath('pdb/7kd7_B.pdb'),\n",
       " PosixPath('pdb/7kpu_E.pdb'),\n",
       " PosixPath('pdb/7kpu_B.pdb'),\n",
       " PosixPath('pdb/7oju_H.pdb'),\n",
       " PosixPath('pdb/7pul_P.pdb'),\n",
       " PosixPath('pdb/7x70_B.pdb'),\n",
       " PosixPath('pdb/7z5z_C.pdb')]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "manager.remove_unavailable_pdbs(update=True)\n",
    "manager.write_chains()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "graphein",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
